-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
821 additions
and
0 deletions.
There are no files selected for viewing
146 changes: 146 additions & 0 deletions
146
000458/FlatironInstitute/000_lindi_vs_fsspec_streaming.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"/home/magland/miniconda3/envs/000458_fi/lib/python3.12/site-packages/hdmf/utils.py:668: UserWarning: Ignoring cached namespace 'hdmf-common' version 1.5.1 because version 1.8.0 is already loaded.\n", | ||
" return func(args[0], **pargs)\n", | ||
"/home/magland/miniconda3/envs/000458_fi/lib/python3.12/site-packages/hdmf/utils.py:668: UserWarning: Ignoring cached namespace 'core' version 2.5.0 because version 2.7.0 is already loaded.\n", | ||
" return func(args[0], **pargs)\n", | ||
"/home/magland/miniconda3/envs/000458_fi/lib/python3.12/site-packages/hdmf/utils.py:668: UserWarning: Ignoring cached namespace 'hdmf-experimental' version 0.2.0 because version 0.5.0 is already loaded.\n", | ||
" return func(args[0], **pargs)\n" | ||
] | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Elapsed time for lindi: 7.52 s\n", | ||
"Elapsed time for fsspec: 141.01 s\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# This notebook compares the time taken to stream an NWB file from DANDI archive\n", | ||
"# using lindi and fsspec. On my laptop on my home WiFi network, lindi streaming\n", | ||
"# took 7.52 s and fsspec streaming took 141.01 s. This is expected to depend\n", | ||
"# heavily on the network properties.\n", | ||
"\n", | ||
"import time\n", | ||
"from pynwb import NWBHDF5IO\n", | ||
"from dandi.dandiapi import DandiAPIClient\n", | ||
"from fsspec import filesystem\n", | ||
"from h5py import File\n", | ||
"import lindi\n", | ||
"\n", | ||
"\n", | ||
"def stream_nwbfile(DANDISET_ID, file_path):\n", | ||
" '''Stream NWB file from DANDI archive.\n", | ||
"\n", | ||
" Parameters\n", | ||
" ----------\n", | ||
" DANDISET_ID : str\n", | ||
" Dandiset ID\n", | ||
" file_path : str\n", | ||
" Path to NWB file in DANDI archive\n", | ||
"\n", | ||
" Returns\n", | ||
" -------\n", | ||
" nwbfile : NWBFile\n", | ||
" NWB file\n", | ||
" io : NWBHDF5IO\n", | ||
" NWB IO object (for closing)\n", | ||
"\n", | ||
" Notes\n", | ||
" -----\n", | ||
" The io object must be closed after use.\n", | ||
" '''\n", | ||
" with DandiAPIClient() as client:\n", | ||
" asset = client.get_dandiset(DANDISET_ID, 'draft').get_asset_by_path(file_path)\n", | ||
" asset_url = asset.get_content_url(follow_redirects=0, strip_query=True)\n", | ||
" file = lindi.LindiH5pyFile.from_hdf5_file(asset_url)\n", | ||
" io = NWBHDF5IO(file=file, load_namespaces=True)\n", | ||
" nwbfile = io.read()\n", | ||
" return nwbfile, io\n", | ||
"\n", | ||
"\n", | ||
"def stream_nwbfile_fsspec(DANDISET_ID, file_path):\n", | ||
" '''Stream NWB file from DANDI archive.\n", | ||
"\n", | ||
" Parameters\n", | ||
" ----------\n", | ||
" DANDISET_ID : str\n", | ||
" Dandiset ID\n", | ||
" file_path : str\n", | ||
" Path to NWB file in DANDI archive\n", | ||
"\n", | ||
" Returns\n", | ||
" -------\n", | ||
" nwbfile : NWBFile\n", | ||
" NWB file\n", | ||
" io : NWBHDF5IO\n", | ||
" NWB IO object (for closing)\n", | ||
"\n", | ||
" Notes\n", | ||
" -----\n", | ||
" The io object must be closed after use.\n", | ||
" '''\n", | ||
" with DandiAPIClient() as client:\n", | ||
" asset = client.get_dandiset(DANDISET_ID, 'draft').get_asset_by_path(file_path)\n", | ||
" asset_url = asset.get_content_url(follow_redirects=0, strip_query=True)\n", | ||
" fs = filesystem(\"http\")\n", | ||
" file_system = fs.open(asset_url, \"rb\")\n", | ||
" file = File(file_system, mode=\"r\")\n", | ||
" io = NWBHDF5IO(file=file, load_namespaces=True)\n", | ||
" nwbfile = io.read()\n", | ||
" return nwbfile, io\n", | ||
"\n", | ||
"\n", | ||
"DANDISET_ID = \"000458\"\n", | ||
"file_path = \"sub-586468/sub-586468_ses-20210819_behavior+ecephys.nwb\"\n", | ||
"\n", | ||
"timer = time.time()\n", | ||
"nwbfile, io = stream_nwbfile(DANDISET_ID, file_path)\n", | ||
"print(f\"Elapsed time for lindi: {time.time() - timer:.2f} s\")\n", | ||
"\n", | ||
"timer = time.time()\n", | ||
"nwbfile, io = stream_nwbfile_fsspec(DANDISET_ID, file_path)\n", | ||
"print(f\"Elapsed time for fsspec: {time.time() - timer:.2f} s\")\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "000458_fi", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.0" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.