diff --git a/datashader/tests/test_tiles.py b/datashader/tests/test_tiles.py index 807c9e6c6..debe1d539 100644 --- a/datashader/tests/test_tiles.py +++ b/datashader/tests/test_tiles.py @@ -18,6 +18,8 @@ MERCATOR_CONST = 20037508.34 df = None + + def mock_load_data_func(x_range, y_range): global df if df is None: @@ -89,39 +91,39 @@ def assert_is_numeric(value): assert any([is_int_or_float, is_numpy_int_or_float]) - def test_get_super_tile_min_max(): - tile_info = {'level': 0, - 'x_range': (-MERCATOR_CONST, MERCATOR_CONST), - 'y_range': (-MERCATOR_CONST, MERCATOR_CONST), - 'tile_size': 256, - 'span': (0, 1000)} + 'x_range': (-MERCATOR_CONST, MERCATOR_CONST), + 'y_range': (-MERCATOR_CONST, MERCATOR_CONST), + 'tile_size': 256, + 'span': (0, 1000)} - agg = _get_super_tile_min_max(tile_info, mock_load_data_func, mock_rasterize_func) + tile_info = _get_super_tile_min_max(tile_info, mock_load_data_func, mock_rasterize_func) - result = [np.nanmin(agg.data), np.nanmax(agg.data)] + result = [tile_info['span_min'], tile_info['span_max']] assert isinstance(result, list) assert len(result) == 2 assert_is_numeric(result[0]) assert_is_numeric(result[1]) + def test_calculate_zoom_level_stats_with_fullscan_ranging_strategy(): full_extent = (-MERCATOR_CONST, -MERCATOR_CONST, MERCATOR_CONST, MERCATOR_CONST) level = 0 color_ranging_strategy = 'fullscan' super_tiles, span = calculate_zoom_level_stats(list(gen_super_tiles(full_extent, level)), - mock_load_data_func, - mock_rasterize_func, - color_ranging_strategy=color_ranging_strategy) + mock_load_data_func, + mock_rasterize_func, + color_ranging_strategy=color_ranging_strategy) assert isinstance(span, (list, tuple)) assert len(span) == 2 assert_is_numeric(span[0]) assert_is_numeric(span[1]) + def test_meters_to_tile(): # Part of NYC (used in taxi demo) full_extent_of_data = (-8243206.93436, 4968192.04221, -8226510.539480001, 4982886.20438) @@ -129,4 +131,4 @@ def test_meters_to_tile(): zoom = 12 tile_def = MercatorTileDefinition((xmin, xmax), (ymin, ymax), tile_size=256) tile = tile_def.meters_to_tile(xmin, ymin, zoom) - assert tile == (1205, 1540) # using Google tile coordinates, not TMS + assert tile == (1205, 1540) # using Google tile coordinates, not TMS diff --git a/datashader/tiles.py b/datashader/tiles.py index eee58acd3..8e122707a 100644 --- a/datashader/tiles.py +++ b/datashader/tiles.py @@ -3,14 +3,21 @@ import math import os +import sqlite3 +import uuid -import dask import dask.bag as db - import numpy as np - +import xarray from PIL.Image import fromarray +from .utils import meters_to_lnglat + +try: + import netCDF4 +except Exception: + netCDF4 = None + __all__ = ['render_tiles', 'MercatorTileDefinition'] @@ -25,34 +32,53 @@ def _create_dir(path): raise -def _get_super_tile_min_max(tile_info, load_data_func, rasterize_func): - tile_size = tile_info['tile_size'] +def _get_super_tile_min_max(tile_info, load_data_func, rasterize_func, local_cache_path=None): df = load_data_func(tile_info['x_range'], tile_info['y_range']) agg = rasterize_func(df, x_range=tile_info['x_range'], y_range=tile_info['y_range'], - height=tile_size, width=tile_size) - return agg + height=tile_info['tile_size'], width=tile_info['tile_size']) + tile_info['agg_type'] = agg.dtype.kind + tile_info['span_min'] = np.nanmin(agg.data) + tile_info['span_max'] = np.nanmax(agg.data) + + if local_cache_path: + cache_file_path = os.path.join(local_cache_path, 'super_tile_' + str(uuid.uuid4()) + '.nc') + agg.to_netcdf(cache_file_path, engine='netcdf4', format='NETCDF4') + tile_info['cache_file'] = cache_file_path + + return tile_info def calculate_zoom_level_stats(super_tiles, load_data_func, rasterize_func, - color_ranging_strategy='fullscan'): + color_ranging_strategy='fullscan', local_cache_path=None): if color_ranging_strategy == 'fullscan': - stats = [] + + super_tiles = db.from_sequence(super_tiles).map(_get_super_tile_min_max, load_data_func, + rasterize_func, local_cache_path).compute() + + span_min = None + span_max = None is_bool = False + for super_tile in super_tiles: - agg = _get_super_tile_min_max(super_tile, load_data_func, rasterize_func) - super_tile['agg'] = agg - if agg.dtype.kind == 'b': + if super_tile['agg_type'] == 'b': is_bool = True else: - stats.append(np.nanmin(agg.data)) - stats.append(np.nanmax(agg.data)) + if span_min is None: + span_min = super_tile['span_min'] + else: + span_min = min(span_min, super_tile['span_min']) + + if span_max is None: + span_max = super_tile['span_max'] + else: + span_max = max(span_max, super_tile['span_max']) + if is_bool: span = (0, 1) else: - b = db.from_sequence(stats) - span = dask.compute(b.min(), b.max()) + span = (span_min, span_max) return super_tiles, span else: raise ValueError('Invalid color_ranging_strategy option') @@ -60,16 +86,22 @@ def calculate_zoom_level_stats(super_tiles, load_data_func, def render_tiles(full_extent, levels, load_data_func, rasterize_func, shader_func, - post_render_func, output_path, color_ranging_strategy='fullscan'): + post_render_func, output_path, color_ranging_strategy='fullscan', + local_cache_path=None): results = dict() + + _setup(full_extent, levels, output_path, local_cache_path) + for level in levels: print('calculating statistics for level {}'.format(level)) super_tiles, span = calculate_zoom_level_stats(list(gen_super_tiles(full_extent, level)), load_data_func, rasterize_func, - color_ranging_strategy=color_ranging_strategy) + color_ranging_strategy=color_ranging_strategy, + local_cache_path=local_cache_path) print('rendering {} supertiles for zoom level {} with span={}'.format(len(super_tiles), level, span)) b = db.from_sequence(super_tiles) - b.map(render_super_tile, span, output_path, shader_func, post_render_func).compute() + b.map(render_super_tile, span, output_path, load_data_func, rasterize_func, shader_func, post_render_func, + local_cache_path).compute() results[level] = dict(success=True, stats=span, supertile_count=len(super_tiles)) return results @@ -80,8 +112,8 @@ def gen_super_tiles(extent, zoom_level, span=None): super_tile_size = min(2 ** 4 * 256, (2 ** zoom_level) * 256) super_tile_def = MercatorTileDefinition(x_range=(xmin, xmax), y_range=(ymin, ymax), tile_size=super_tile_size) - super_tiles = super_tile_def.get_tiles_by_extent(extent, zoom_level) - for s in super_tiles: + + for s in super_tile_def.get_tiles_by_extent(extent, zoom_level): st_extent = s[3] x_range = (st_extent[0], st_extent[2]) y_range = (st_extent[1], st_extent[3]) @@ -92,30 +124,60 @@ def gen_super_tiles(extent, zoom_level, span=None): 'span': span} -def render_super_tile(tile_info, span, output_path, shader_func, post_render_func): - level = tile_info['level'] - ds_img = shader_func(tile_info['agg'], span=span) - return create_sub_tiles(ds_img, level, tile_info, output_path, post_render_func) +def render_super_tile(tile_info, span, output_path, load_data_func, rasterize_func, shader_func, post_render_func, + local_cache_path): + agg = None + + if local_cache_path is not None: + agg = xarray.load_dataarray(tile_info['cache_file'], engine='netcdf4') + os.remove(tile_info['cache_file']) + else: + df = load_data_func(tile_info['x_range'], tile_info['y_range']) + agg = rasterize_func(df, x_range=tile_info['x_range'], + y_range=tile_info['y_range'], + height=tile_info['tile_size'], width=tile_info['tile_size']) + + ds_img = shader_func(agg, span=span) + + return create_sub_tiles(ds_img, tile_info, output_path, post_render_func) -def create_sub_tiles(data_array, level, tile_info, output_path, post_render_func=None): - # validate / createoutput_dir - _create_dir(output_path) +def _setup(full_extent, levels, output_path, local_cache_path): + if os.path.splitext(output_path)[1] == '.mbtiles': + output_dir = os.path.dirname(output_path) + + if output_dir: + _create_dir(output_dir) + + # Create mbtiles file and setup sqlite tables. + MapboxTileRenderer.setup(output_path, full_extent, levels[0], levels[len(levels) - 1]) + else: + _create_dir(output_path) + + if local_cache_path: + assert netCDF4, 'netcdf4 library must be installed for use with local_cache.' + _create_dir(local_cache_path) + + +def create_sub_tiles(data_array, tile_info, output_path, post_render_func=None): # create tile source tile_def = MercatorTileDefinition(x_range=tile_info['x_range'], y_range=tile_info['y_range'], tile_size=256) # create Tile Renderer - if output_path.startswith('s3:'): + if output_path.endswith("mbtiles"): + renderer = MapboxTileRenderer(tile_def, output_location=output_path, + post_render_func=post_render_func) + elif output_path.startswith('s3:'): renderer = S3TileRenderer(tile_def, output_location=output_path, post_render_func=post_render_func) else: renderer = FileSystemTileRenderer(tile_def, output_location=output_path, post_render_func=post_render_func) - return renderer.render(data_array, level=level) + return renderer.render(data_array, level=tile_info['level']) def invert_y_tile(y, z): @@ -260,15 +322,11 @@ def get_tiles_by_extent(self, extent, level): txmin, tymax = self.meters_to_tile(xmin, ymin, level) txmax, tymin = self.meters_to_tile(xmax, ymax, level) - # TODO: vectorize? - tiles = [] for ty in range(tymin, tymax + 1): for tx in range(txmin, txmax + 1): if self.is_valid_tile(tx, ty, level): t = (tx, ty, level, self.get_tile_meters(tx, ty, level)) - tiles.append(t) - - return tiles + yield t def get_tile_meters(self, tx, ty, level): ty = invert_y_tile(ty, level) # convert to TMS for conversion to meters @@ -296,11 +354,11 @@ def render(self, da, level): ymin, ymax = self.tile_def.y_range extent = xmin, ymin, xmax, ymax - tiles = self.tile_def.get_tiles_by_extent(extent, level) - for t in tiles: + for t in self.tile_def.get_tiles_by_extent(extent, level): x, y, z, data_extent = t dxmin, dymin, dxmax, dymax = data_extent - arr = da.loc[{'x': slice(dxmin, dxmax), 'y': slice(dymin, dymax)}] + + arr = da.loc[{da.dims[1]: slice(dxmin, dxmax), da.dims[0]: slice(dymin, dymax)}] if 0 in arr.shape: continue @@ -407,3 +465,83 @@ def render(self, da, level): client.put_object(Body=output_buf, Bucket=bucket, Key=key, ACL='public-read') return 'https://{}.s3.amazonaws.com/{}'.format(bucket, s3_info.path) + + +class MapboxTileRenderer(TileRenderer): + + @staticmethod + def setup(output_location, full_extent, min_zoom, max_zoom, tile_format="PNG"): + con = sqlite3.connect(output_location) + cur = con.cursor() + + # Create MBTiles tables. + cur.execute(""" + create table if not exists tiles ( + zoom_level integer, + tile_column integer, + tile_row integer, + tile_data blob); + """) + cur.execute("""create table if not exists metadata + (name text, value text);""") + cur.execute("""create unique index if not exists name on metadata (name);""") + cur.execute("""create unique index if not exists tile_index on tiles + (zoom_level, tile_column, tile_row);""") + + # Compute Extents in WGS84 + min_lon, min_lat = meters_to_lnglat(full_extent[0], full_extent[1]) + max_lon, max_lat = meters_to_lnglat(full_extent[2], full_extent[3]) + + # Compute the Center & Zoom Level + lat_diff = max_lat - min_lat + lon_diff = max_lon - min_lon + + lat_center = min_lat + (lat_diff / 2) + lon_center = min_lon + (lon_diff / 2) + + max_diff = max(lon_diff, lat_diff) + zoom_level = None + if max_diff < (360.0 / math.pow(2, 20)): + zoom_level = 21 + else: + zoom_level = int(-1 * ((math.log(max_diff) / math.log(2.0)) - (math.log(360.0) / math.log(2)))) + if zoom_level < 1: + zoom_level = 1 + + # Setup MBTiles metadata table. + cur.execute("""insert or replace into metadata (name, value) values (?, ?);""", + ("name", os.path.splitext(os.path.basename(output_location))[0])) + cur.execute("""insert or replace into metadata (name, value) values (?, ?);""", + ("format", tile_format.lower())) + cur.execute("""insert or replace into metadata (name, value) values (?, ?);""", + ("bounds", "{},{},{},{}".format(min_lon, min_lat, max_lon, max_lat))) + cur.execute("""insert or replace into metadata (name, value) values (?, ?);""", + ("center", "{},{},{}".format(lon_center, lat_center, zoom_level))) + cur.execute("""insert or replace into metadata (name, value) values (?, ?);""", + ("minzoom", min_zoom)) + cur.execute("""insert or replace into metadata (name, value) values (?, ?);""", + ("maxzoom", max_zoom)) + + cur.close() + con.commit() + con.close() + + def render(self, da, level): + con = sqlite3.connect(self.output_location, isolation_level=None) + cur = con.cursor() + + for img, x, y, z in super(MapboxTileRenderer, self).render(da, level): + image_bytes = BytesIO() + img.save(image_bytes, self.tile_format) + image_bytes.seek(0) + + tile_row = (2 ** z) - 1 - y + + cur.execute("""insert or replace into tiles (zoom_level, + tile_column, tile_row, tile_data) values + (?, ?, ?, ?);""", + (z, x, tile_row, sqlite3.Binary(image_bytes.getvalue()))) + + cur.close() + con.commit() + con.close() diff --git a/datashader/utils.py b/datashader/utils.py index c30bac6a1..8af9c1877 100644 --- a/datashader/utils.py +++ b/datashader/utils.py @@ -395,6 +395,32 @@ def lnglat_to_meters(longitude, latitude): return (easting, northing) +def meters_to_lnglat(easting, northing): + """ + Projects the given easting, northing values into + longitude, latitude coordinates. + easting and northing values are assumed to be in Web Mercator + (aka Pseudo-Mercator or EPSG:3857) coordinates. + Args: + easting + northing + Returns: + (longitude, latitude) + """ + if isinstance(easting, (list, tuple)): + easting = np.array(easting) + if isinstance(northing, (list, tuple)): + northing = np.array(northing) + + origin_shift = np.pi * 6378137 + longitude = easting * 180.0 / origin_shift + with np.errstate(divide='ignore'): + latitude = np.arctan( + np.exp(northing * np.pi / origin_shift) + ) * 360.0 / np.pi - 90 + return longitude, latitude + + # Heavily inspired by odo def dshape_from_pandas_helper(col): """Return an object from datashape.coretypes given a column from a pandas