diff --git a/lassie/apps/lassie.py b/lassie/apps/lassie.py index 84a946f1..8add8e89 100644 --- a/lassie/apps/lassie.py +++ b/lassie/apps/lassie.py @@ -7,7 +7,6 @@ from datetime import datetime from pathlib import Path -import nest_asyncio from pkg_resources import get_distribution from lassie.console import console @@ -19,8 +18,6 @@ from lassie.station_corrections import StationCorrections from lassie.utils import CACHE_DIR, setup_rich_logging -nest_asyncio.apply() - logger = logging.getLogger(__name__) diff --git a/lassie/octree.py b/lassie/octree.py index a07519ce..51c2fa84 100644 --- a/lassie/octree.py +++ b/lassie/octree.py @@ -359,6 +359,27 @@ def smallest_node_size(self) -> float: size /= 2 return size + def n_levels(self) -> int: + """Returns the number of levels in the octree. + + Returns: + int: Number of levels. + """ + levels = 0 + size = self.size_initial + while size >= self.size_limit * 2: + levels += 1 + size /= 2 + return levels + + def total_number_nodes(self) -> int: + """Returns the total number of nodes of all levels. + + Returns: + int: Total number of nodes. + """ + return len(self._root_nodes) * (8 ** self.n_levels()) + def maximum_number_nodes(self) -> int: """Returns the maximum number of nodes. diff --git a/lassie/search/squirrel.py b/lassie/search/squirrel.py index 6bdaa8b7..aa3000c4 100644 --- a/lassie/search/squirrel.py +++ b/lassie/search/squirrel.py @@ -8,7 +8,14 @@ from pathlib import Path from typing import TYPE_CHECKING, Any, Deque, Iterator -from pydantic import AwareDatetime, Field, PositiveInt, PrivateAttr, field_validator +from pydantic import ( + AwareDatetime, + Field, + PositiveInt, + PrivateAttr, + constr, + field_validator, +) from pyrocko.squirrel import Squirrel from lassie.features import FeatureExtractors @@ -49,6 +56,7 @@ async def prefetch_worker(self) -> None: class SquirrelSearch(Search): time_span: tuple[AwareDatetime | None, AwareDatetime | None] = (None, None) squirrel_environment: Path = Path(".") + channel_selector: constr(max_length=3) = "*" waveform_data: list[Path] waveform_prefetch_batches: PositiveInt = 4 @@ -131,7 +139,9 @@ async def scan_squirrel(self) -> None: tinc=window_increment.total_seconds(), tpad=self.window_padding.total_seconds(), want_incomplete=False, - codes=[(*nsl, "*") for nsl in self.stations.get_all_nsl()], + codes=[ + (*nsl, self.channel_selector) for nsl in self.stations.get_all_nsl() + ], ) prefetcher = SquirrelPrefetcher(iterator, self.waveform_prefetch_batches) diff --git a/lassie/tracers/cake.py b/lassie/tracers/cake.py index 4a7535b5..2659c1fa 100644 --- a/lassie/tracers/cake.py +++ b/lassie/tracers/cake.py @@ -503,7 +503,7 @@ async def prepare(self, octree: Octree, stations: Stations) -> None: LRU_CACHE_SIZE = int(self.lut_cache_size / bytes_per_node / n_trees) # TODO: This should be total number nodes. Not only leaf nodes. - node_cache_fraction = LRU_CACHE_SIZE / octree.maximum_number_nodes() + node_cache_fraction = LRU_CACHE_SIZE / octree.total_number_nodes() logging.info( "limiting traveltime LUT size to %d nodes (%s)," " caching %.1f%% of possible octree nodes", @@ -515,7 +515,7 @@ async def prepare(self, octree: Octree, stations: Stations) -> None: cached_trees = [ TravelTimeTree.load(file) for file in self.cache_dir.glob("*.sptree") ] - logger.debug("loaded %d cached traveltime trees", len(cached_trees)) + logger.debug("loaded %d cached travel time trees", len(cached_trees)) distances = octree.distances_stations(stations) source_depths = np.asarray(octree.depth_bounds) diff --git a/lassie/tracers/fast_marching/fast_marching.py b/lassie/tracers/fast_marching/fast_marching.py index 04a79ed1..48c81555 100644 --- a/lassie/tracers/fast_marching/fast_marching.py +++ b/lassie/tracers/fast_marching/fast_marching.py @@ -312,6 +312,19 @@ async def prepare( reason=f"outside fast-marching velocity model, offset {offset}", ) + for station in stations: + velocity_station = velocity_model.get_velocity(station) + if velocity_station < 0.0: + raise ValueError( + f"station {station.pretty_nsl} has negative velocity" + f" {velocity_station}" + ) + logger.info( + "velocity at station %s: %.1f m/s", + station.pretty_nsl, + velocity_station, + ) + nodes_covered = [ node for node in octree if velocity_model.is_inside(node.as_location()) ] @@ -319,8 +332,9 @@ async def prepare( raise ValueError("no octree node is inside the velocity model") logger.info( - "%d%% octree nodes are inside the velocity model", + "%d%% octree nodes are inside the %s velocity model", len(nodes_covered) / octree.n_nodes * 100, + self.phase, ) self._cached_stations = stations @@ -332,7 +346,7 @@ async def prepare( self._node_lut = LRU(lru_cache_size) # TODO: This should be total number nodes. Not only leaf nodes. - node_cache_fraction = lru_cache_size / octree.maximum_number_nodes() + node_cache_fraction = lru_cache_size / octree.total_number_nodes() logging.info( "limiting traveltime LUT size to %d nodes (%s)," " caching %.1f%% of possible octree nodes", @@ -364,7 +378,9 @@ def _load_cached_tavel_times(self, cache_dir: Path) -> None: continue volumes[travel_times.station.location_hash()] = travel_times - logger.info("loaded %d travel times volumes from cache", len(volumes)) + logger.info( + "loaded %d travel times volumes for %s from cache", len(volumes), self.phase + ) self._travel_time_volumes.update(volumes) async def _calculate_travel_times( diff --git a/lassie/tracers/fast_marching/velocity_models.py b/lassie/tracers/fast_marching/velocity_models.py index ac261592..e97931d4 100644 --- a/lassie/tracers/fast_marching/velocity_models.py +++ b/lassie/tracers/fast_marching/velocity_models.py @@ -22,7 +22,6 @@ from lassie.models.location import Location if TYPE_CHECKING: - from lassie.models.station import Station from lassie.octree import Octree @@ -89,25 +88,71 @@ def velocity_model(self) -> np.ndarray: return self._velocity_model def hash(self) -> str: + """Return hash of velocity model. + + Returns: + str: The hash. + """ if self._hash is None: sha1_hash = sha1(self._velocity_model.tobytes()) self._hash = sha1_hash.hexdigest() return self._hash - def get_source_arrival_grid(self, station: Station) -> np.ndarray: - if not self.is_inside(station): - raise ValueError("Station is outside of velocity model.") + def _get_location_indices(self, location: Location) -> tuple[int, int, int]: + """Return indices of location in velocity model, by nearest neighbor. - times = np.full_like(self.velocity_model, fill_value=-1.0) + Args: + location (Location): The location. - station_offset = station.offset_to(self.center) + Returns: + tuple[int, int, int]: The indices as (east, north, depth). + """ + if not self.is_inside(location): + raise ValueError("Location is outside of velocity model.") + station_offset = location.offset_to(self.center) east_idx = np.argmin(np.abs(self._east_coords - station_offset[0])) north_idx = np.argmin(np.abs(self._north_coords - station_offset[1])) depth_idx = np.argmin(np.abs(self._depth_coords - station_offset[2])) + return int(east_idx), int(north_idx), int(depth_idx) + + def get_velocity(self, location: Location) -> float: + """Return velocity at location in [m/s], nearest neighbor. + + Args: + location (Location): The location. + + Returns: + float: The velocity in m/s. + """ + east_idx, north_idx, depth_idx = self._get_location_indices(location) + return self.velocity_model[east_idx, north_idx, depth_idx] + + def get_source_arrival_grid(self, location: Location) -> np.ndarray: + """Return travel times grid for Eikonal for specific. + + The initial travel time grid is filled with -1.0, except for the source + location, which is set to 0.0 s. + + Args: + location (Location): The location. + + Returns: + np.ndarray: The initial travel times grid. + """ + times = np.full_like(self.velocity_model, fill_value=-1.0) + east_idx, north_idx, depth_idx = self._get_location_indices(location) times[east_idx, north_idx, depth_idx] = 0.0 return times def is_inside(self, location: Location) -> bool: + """Return True if location is inside velocity model. + + Args: + location (Location): The location. + + Returns: + bool: True if location is inside velocity model. + """ offset_to_center = location.offset_to(self.center) return ( self.east_bounds[0] <= offset_to_center[0] <= self.east_bounds[1] @@ -116,6 +161,12 @@ def is_inside(self, location: Location) -> bool: ) def get_meshgrid(self) -> list[np.ndarray]: + """Return meshgrid of velocity model coordinates. + + Returns: + list[np.ndarray]: The meshgrid as list of numpy arrays for east, north, + depth. + """ return np.meshgrid( self._east_coords, self._north_coords, @@ -128,6 +179,16 @@ def resample( grid_spacing: float, method: Literal["nearest", "linear", "cubic"] = "linear", ) -> Self: + """Resample velocity model to new grid spacing. + + Args: + grid_spacing (float): The new grid spacing in [m]. + method (Literal['nearest', 'linear', 'cubic'], optional): Interpolation + method. Defaults to "linear". + + Returns: + Self: A new, resampled velocity model. + """ if grid_spacing == self.grid_spacing: return self @@ -215,6 +276,20 @@ def from_header_file( file: Path, reference_location: Location | None = None, ) -> Self: + """Load NonLinLoc velocity model header file. + + Args: + file (Path): Path to NonLinLoc model header file. + reference_location (Location | None, optional): relative location of + NonLinLoc model, used for models with relative coordinates. + Defaults to None. + + Raises: + ValueError: If grid spacing is not equal in all dimensions. + + Returns: + Self: The header. + """ logger.info("loading NonLinLoc velocity model header file %s", file) header_text = file.read_text().split("\n")[0] header_text = re.sub(r"\s+", " ", header_text) # remove excessive spaces @@ -284,6 +359,11 @@ def depth_bounds(self) -> tuple[float, float]: @property def center(self) -> Location: + """Return center location of velocity model. + + Returns: + Location: The center location of the grid. + """ center = self.origin.model_copy(deep=True) center.east_shift += self.delta_x * self.nx / 2 center.north_shift += self.delta_y * self.ny / 2 diff --git a/pyproject.toml b/pyproject.toml index 14ba8474..0cd343bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,14 +30,13 @@ dependencies = [ "numpy>=1.17.3", "scipy>=1.8.0", "pyrocko>=2022.06.10", - "seisbench>=0.4.0", + "seisbench>=0.5.0", "pydantic>=2.3", "aiohttp>=3.8", "aiohttp_cors>=0.7.0", "typing-extensions>=4.6", "lru-dict>=1.2", "rich>=13.4", - "nest-asyncio>=1.5", # wait for seisbench merge https://github.com/seisbench/seisbench/pull/214 ] classifiers = [ diff --git a/test/conftest.py b/test/conftest.py index 36edbfb5..416b186b 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,11 +1,14 @@ +import asyncio import random from datetime import timedelta from pathlib import Path from tempfile import TemporaryDirectory from typing import Generator +import aiohttp import numpy as np import pytest +from rich.progress import Progress from lassie.models.detection import EventDetection, EventDetections from lassie.models.location import Location @@ -16,6 +19,14 @@ DATA_DIR = Path(__file__).parent / "data" +DATA_URL = "https://data.pyrocko.org/testing/lassie-v2/" +DATA_FILES = { + "FORGE_3D_5_large.P.mod.hdr", + "FORGE_3D_5_large.P.mod.buf", + "FORGE_3D_5_large.S.mod.hdr", + "FORGE_3D_5_large.S.mod.buf", +} + KM = 1e3 @@ -43,6 +54,38 @@ def travel_time_tree() -> TravelTimeTree: @pytest.fixture(scope="session") def data_dir() -> Path: + if not DATA_DIR.exists(): + DATA_DIR.mkdir() + + async def download_data(): + download_files = DATA_FILES.copy() + for filename in DATA_FILES: + filepath = DATA_DIR / filename + if filepath.exists(): + download_files.remove(filename) + + if not download_files: + return + + async with aiohttp.ClientSession() as session: + for filename in download_files: + filepath = DATA_DIR / filename + url = DATA_URL + filename + with Progress() as progress: + async with session.get(url) as response: + task = progress.add_task( + f"Downloading {url}", + total=response.content_length, + ) + with filepath.open("wb") as f: + while True: + chunk = await response.content.read(1024) + if not chunk: + break + f.write(chunk) + progress.advance(task, len(chunk)) + + asyncio.run(download_data()) return DATA_DIR diff --git a/test/test_fast_marching.py b/test/test_fast_marching.py index 12f12bd6..4d481df7 100644 --- a/test/test_fast_marching.py +++ b/test/test_fast_marching.py @@ -43,6 +43,7 @@ def stations_inside( depth=model.center.depth + (depth if depth is not None else rng.uniform(*model.depth_bounds)), ) + station = station.shifted_origin() stations.append(station) return Stations(stations=stations) @@ -87,7 +88,7 @@ async def test_load_save( @pytest.mark.asyncio -async def test_load_interpolation( +async def test_travel_time_interpolation( station_travel_times: StationTravelTimeVolume, octree: Octree, ) -> None: @@ -105,13 +106,25 @@ async def test_load_interpolation( analytical_travel_times = np.array(source_distances) / CONSTANT_VELOCITY nan_travel_times = np.isnan(eikonal_travel_times) + + assert np.any(~nan_travel_times) np.testing.assert_almost_equal( eikonal_travel_times[~nan_travel_times], analytical_travel_times[~nan_travel_times], decimal=1, ) - station_travel_times.interpolate_nodes(octree) + eikonal_travel_times = station_travel_times.interpolate_nodes( + octree, method="cubic" + ) + + nan_travel_times = np.isnan(eikonal_travel_times) + assert np.any(~nan_travel_times) + np.testing.assert_almost_equal( + eikonal_travel_times[~nan_travel_times], + analytical_travel_times[~nan_travel_times], + decimal=1, + ) @pytest.mark.asyncio diff --git a/test/upload_data.sh b/test/upload_data.sh new file mode 100755 index 00000000..b2884ebf --- /dev/null +++ b/test/upload_data.sh @@ -0,0 +1,3 @@ +#!/bin/bash +echo "Uploading test data to data.pyrocko.org" +scp data/* pyrocko-www@data.pyrocko.org:/srv/data.pyrocko.org/www/testing/lassie-v2