Skip to content

Commit

Permalink
Add support and documentation for export from HDMF version 2 (#1280)
Browse files Browse the repository at this point in the history
* Add roundtrip export testing

* Stash changes

* Fix test warning

* Remove logging

* Clean up roundtrip mixin

* Use HDMF 2.1.0

* First pass at adding export and export docs

* Update tutorial

* Add tutorial on export

* Fix rst formatting

* Change docs file name and add section on object IDs and HDMF export

* Fix generate new ID text

* Update changelog
  • Loading branch information
rly authored Aug 13, 2020
1 parent 3d00833 commit 4eb8468
Show file tree
Hide file tree
Showing 5 changed files with 297 additions and 6 deletions.
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# PyNWB Changelog

## PyNWB 1.4.0 (August 11, 2020)
## PyNWB 1.4.0 (August 12, 2020)

Users can now add/remove containers from a written NWB file and export the modified NWBFile to a new file path.
@rly (#1280)
- See https://pynwb.readthedocs.io/en/stable/tutorials/general/add-remove-containers.html for examples and more
information.

### Internal improvements:
- Update requirements to use HDMF 2.1.0. @rly (#1256)
Expand Down
215 changes: 215 additions & 0 deletions docs/gallery/general/add_remove_containers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
"""
.. _modifying_data:
Adding/removing containers from an NWB file
============================================
This tutorial explains how to add and remove containers from an existing NWB file and either write the data back to the
same file or export the data to a new file.
"""

###############################################################################
# Adding objects to an NWB file in read/write mode
# ----------------------------------------------------
# PyNWB supports adding container objects to an existing NWB file - that is, reading data from an NWB file, adding a
# container object, such as a new :py:class:`~pynwb.base.TimeSeries` object, and writing the modified
# :py:class:`~pynwb.file.NWBFile` back to the same file path on disk. To do so:
#
# 1. open the file with an :py:class:`~pynwb.NWBHDF5IO` object in read/write mode (``mode='r+'`` or ``mode='a'``)
# 2. read the :py:class:`~pynwb.file.NWBFile`
# 3. add container objects to the :py:class:`~pynwb.file.NWBFile` object
# 4. write the modified :py:class:`~pynwb.file.NWBFile` using the same :py:class:`~pynwb.NWBHDF5IO` object
#
# For example:

from pynwb import NWBFile, NWBHDF5IO, TimeSeries
import datetime
import numpy as np

# first, write a test NWB file
nwbfile = NWBFile(
session_description='demonstrate adding to an NWB file',
identifier='NWB123',
session_start_time=datetime.datetime.now(datetime.timezone.utc),
)

filename = 'nwbfile.nwb'
with NWBHDF5IO(filename, 'w') as io:
io.write(nwbfile)

# open the NWB file in r+ mode
with NWBHDF5IO(filename, 'r+') as io:
read_nwbfile = io.read()

# create a TimeSeries and add it to the file under the acquisition group
data = list(range(100, 200, 10))
timestamps = np.arange(10, dtype=np.float)
test_ts = TimeSeries(
name='test_timeseries',
data=data,
unit='m',
timestamps=timestamps
)
read_nwbfile.add_acquisition(test_ts)

# write the modified NWB file
io.write(read_nwbfile)

# confirm the file contains the new TimeSeries in acquisition
with NWBHDF5IO(filename, 'r') as io:
read_nwbfile = io.read()
print(read_nwbfile)

###############################################################################
# .. note::
#
# You cannot remove objects from an NWB file using the above method.

###############################################################################
# Modifying an NWB file in this way has limitations. The destination file path must be the same as the source
# file path, and it is not possible to remove objects from an NWB file. You can use the
# :py:meth:`NWBHDF5IO.export <pynwb.NWBHDF5IO.export>` method, detailed below, to modify an NWB file in these ways.
#
# .. warning::
#
# NWB datasets that have been written to disk are read as :py:class:`h5py.Dataset <h5py.Dataset>` objects.
# Directly modifying the data in these :py:class:`h5py.Dataset <h5py.Dataset>` objects immediately
# modifies the data on disk
# (the :py:meth:`NWBHDF5IO.write <pynwb.NWBHDF5IO.write>` method does not need to be called and the
# :py:class:`~pynwb.NWBHDF5IO` instance does not need to be closed). Directly modifying datasets in this way
# can lead to files that do not validate or cannot be opened, so take caution when using this method.
# Note: only chunked datasets or datasets with ``maxshape`` set can be resized.
# See the `h5py chunked storage documentation <https://docs.h5py.org/en/stable/high/dataset.html#chunked-storage>`_
# for more details.

###############################################################################
# .. note::
#
# It is not possible to modify the attributes (fields) of an NWB container in memory.

###############################################################################
# Exporting a written NWB file to a new file path
# ---------------------------------------------------
# Use the :py:meth:`NWBHDF5IO.export <pynwb.NWBHDF5IO.export>` method to read data to an existing NWB file,
# modify the data, and write the modified data to a new file path. Modifications to the data can be additions or
# removals of objects, such as :py:class:`~pynwb.base.TimeSeries` objects. This is especially useful if you
# have raw data and processed data in the same NWB file and you want to create a new NWB file with all of the
# contents of the original file except for the raw data for sharing with collaborators.
#
# To remove existing containers, use the :py:class:`~hdmf.utils.LabelledDict.pop` method on any
# :py:class:`~hdmf.utils.LabelledDict` object, such as ``NWBFile.acquisition``, ``NWBFile.processing``,
# ``NWBFile.analysis``, ``NWBFile.processing``, ``NWBFile.scratch``, ``NWBFile.devices``, ``NWBFile.stimulus``,
# ``NWBFile.stimulus_template``, ``NWBFile.electrode_groups``, ``NWBFile.imaging_planes``,
# ``NWBFile.icephys_electrodes``, ``NWBFile.ogen_sites``, ``NWBFile.lab_meta_data``,
# and :py:class:`~pynwb.base.ProcessingModule` objects.
#
# For example:

# first, create a test NWB file with a TimeSeries in the acquisition group
nwbfile = NWBFile(
session_description='demonstrate export of an NWB file',
identifier='NWB123',
session_start_time=datetime.datetime.now(datetime.timezone.utc),
)
data1 = list(range(100, 200, 10))
timestamps1 = np.arange(10, dtype=np.float)
test_ts1 = TimeSeries(
name='test_timeseries1',
data=data1,
unit='m',
timestamps=timestamps1
)
nwbfile.add_acquisition(test_ts1)

# then, create a processing module for processed behavioral data
nwbfile.create_processing_module(
name='behavior',
description='processed behavioral data'
)
data2 = list(range(100, 200, 10))
timestamps2 = np.arange(10, dtype=np.float)
test_ts2 = TimeSeries(
name='test_timeseries2',
data=data2,
unit='m',
timestamps=timestamps2
)
nwbfile.processing['behavior'].add(test_ts2)

# write these objects to an NWB file
filename = 'nwbfile.nwb'
with NWBHDF5IO(filename, 'w') as io:
io.write(nwbfile)

# read the written file
export_filename = 'exported_nwbfile.nwb'
with NWBHDF5IO(filename, mode='r') as read_io:
read_nwbfile = read_io.read()

# add a new TimeSeries to the behavior processing module
data3 = list(range(100, 200, 10))
timestamps3 = np.arange(10, dtype=np.float)
test_ts3 = TimeSeries(
name='test_timeseries3',
data=data3,
unit='m',
timestamps=timestamps3
)
read_nwbfile.processing['behavior'].add(test_ts3)

# use the pop method to remove the original TimeSeries from the acquisition group
read_nwbfile.acquisition.pop('test_timeseries1')

# use the pop method to remove a TimeSeries from a processing module
read_nwbfile.processing['behavior'].data_interfaces.pop('test_timeseries2')

# call the export method to write the modified NWBFile instance to a new file path
# the original file is not modified
with NWBHDF5IO(export_filename, mode='w') as export_io:
export_io.export(src_io=read_io, nwbfile=read_nwbfile)

# confirm the exported file does not contain TimeSeries with names 'test_timeseries1' or 'test_timeseries2'
# but does contain a new TimeSeries in processing['behavior'] with name 'test_timeseries3'
with NWBHDF5IO(export_filename, 'r') as io:
read_nwbfile = io.read()
print(read_nwbfile)
print(read_nwbfile.processing['behavior'])

###############################################################################
# .. note::
#
# :py:class:`~pynwb.epoch.TimeIntervals` objects, such as ``NWBFile.epochs``, ``NWBFile.trials``,
# ``NWBFile.invalid_times``, and custom :py:class:`~pynwb.epoch.TimeIntervals` objects cannot be
# removed (popped) from ``NWBFile.intervals``.

###############################################################################
# .. warning::
#
# Removing an object from an NWBFile may break links and references within the file and across files.
# This is analogous to having shortcuts/aliases to a file on your filesystem and then deleting the file.
# Extra caution should be taken when removing heavily referenced items such as
# :py:class:`~pynwb.device.Device` objects,
# :py:class:`~pynwb.ecephys.ElectrodeGroup` objects, the electrodes table, and the
# :py:class:`~pynwb.ophys.PlaneSegmentation` table.

###############################################################################
# Exporting with new object IDs
# ---------------------------------
# When exporting a read NWB file to a new file path, the object IDs within the original NWB file will be copied to the
# new file. To make the exported NWB file contain a new set of object IDs, call
# :py:meth:`~hdmf.container.AbstractContainer.generate_new_id` on your :py:class:`~pynwb.file.NWBFile` object.
# This will generate a new object ID for the :py:class:`~pynwb.file.NWBFile` object and all of the objects within
# the NWB file.

export_filename = 'exported_nwbfile.nwb'
with NWBHDF5IO(filename, mode='r') as read_io:
read_nwbfile = read_io.read()
read_nwbfile.generate_new_id()

with NWBHDF5IO(export_filename, mode='w') as export_io:
export_io.export(src_io=read_io, nwbfile=read_nwbfile)

###############################################################################
# More information about export
# ---------------------------------
# For more information about the export functionality, see https://hdmf.readthedocs.io/en/latest/export.html
11 changes: 11 additions & 0 deletions src/pynwb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,17 @@ def __init__(self, **kwargs):
manager = get_manager()
super(NWBHDF5IO, self).__init__(path, manager=manager, mode=mode, file=file_obj, comm=comm)

@docval({'name': 'src_io', 'type': HDMFIO, 'doc': 'the HDMFIO object for reading the data to export'},
{'name': 'nwbfile', 'type': 'NWBFile',
'doc': 'the NWBFile object to export. If None, then the entire contents of src_io will be exported',
'default': None},
{'name': 'write_args', 'type': dict, 'doc': 'arguments to pass to :py:meth:`write_builder`',
'default': dict()})
def export(self, **kwargs):
nwbfile = popargs('nwbfile', kwargs)
kwargs['container'] = nwbfile
call_docval_func(super().export, kwargs)


from . import io as __io # noqa: F401,E402
from .core import NWBContainer, NWBData # noqa: F401,E402
Expand Down
68 changes: 64 additions & 4 deletions src/pynwb/testing/testh5io.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,20 @@ def setUp(self):
self.create_date = datetime(2018, 4, 15, 12, tzinfo=tzlocal())
self.container_type = self.container.__class__.__name__
self.filename = 'test_%s.nwb' % self.container_type
self.export_filename = 'test_export_%s.nwb' % self.container_type
self.writer = None
self.reader = None
self.export_reader = None

def tearDown(self):
if self.writer is not None:
self.writer.close()
if self.reader is not None:
self.reader.close()
if self.export_reader is not None:
self.export_reader.close()
remove_test_file(self.filename)
remove_test_file(self.export_filename)

@abstractmethod
def setUpContainer(self):
Expand All @@ -62,9 +67,23 @@ def test_roundtrip(self):
self.assertIsNotNone(str(self.read_container))
# make sure we get a completely new object
self.assertNotEqual(id(self.container), id(self.read_container))
# make sure the object ID is preserved
self.assertIs(self.read_nwbfile.objects[self.container.object_id], self.read_container)
self.assertContainerEqual(self.read_container, self.container)

def test_roundtrip_export(self):
"""
Test whether the test Container read from an exported file has the same contents as the original test Container
and validate the file
"""
self.read_container = self.roundtripExportContainer()
self.assertIsNotNone(str(self.read_container)) # added as a test to make sure printing works
# make sure we get a completely new object
self.assertNotEqual(id(self.container), id(self.read_container))
# make sure the object ID is preserved
self.assertIs(self.read_exported_nwbfile.objects[self.container.object_id], self.read_container)
self.assertContainerEqual(self.read_container, self.container, ignore_hdmf_attrs=True)

def roundtripContainer(self, cache_spec=False):
"""
Add the test Container to an NWBFile, write it to file, read the file, and return the test Container from the
Expand All @@ -76,9 +95,8 @@ def roundtripContainer(self, cache_spec=False):
self.addContainer(nwbfile)

with warnings.catch_warnings(record=True) as ws:
self.writer = NWBHDF5IO(self.filename, mode='w')
self.writer.write(nwbfile, cache_spec=cache_spec)
self.writer.close()
with NWBHDF5IO(self.filename, mode='w') as write_io:
write_io.write(nwbfile, cache_spec=cache_spec)

self.validate()

Expand All @@ -101,6 +119,41 @@ def roundtripContainer(self, cache_spec=False):
self.reader = None
raise e

def roundtripExportContainer(self, cache_spec=False):
"""
Add the test Container to an NWBFile, write it to file, read the file, export the read NWBFile to another
file, and return the test Container from the file
"""
self.roundtripContainer(cache_spec=cache_spec) # self.read_nwbfile is now set

with warnings.catch_warnings(record=True) as ws:
NWBHDF5IO.export_io(
src_io=self.reader,
path=self.export_filename,
cache_spec=cache_spec,
)

self.validate()

self.export_reader = NWBHDF5IO(self.export_filename, mode='r')
self.read_exported_nwbfile = self.export_reader.read()

if ws:
for w in ws:
if issubclass(w.category, (MissingRequiredWarning,
OrphanContainerWarning,
BrokenLinkWarning)):
raise Exception('%s: %s' % (w.category.__name__, w.message))
else:
warnings.warn(w.message, w.category)

try:
return self.getContainer(self.read_exported_nwbfile)
except Exception as e:
self.export_reader.close()
self.export_reader = None
raise e

@abstractmethod
def addContainer(self, nwbfile):
""" Should add the test Container to the given NWBFile """
Expand All @@ -112,14 +165,21 @@ def getContainer(self, nwbfile):
raise NotImplementedError('Cannot run test unless getContainer is implemented')

def validate(self):
""" Validate the created file """
""" Validate the created files """
if os.path.exists(self.filename):
with NWBHDF5IO(self.filename, mode='r') as io:
errors = pynwb_validate(io)
if errors:
for err in errors:
raise Exception(err)

if os.path.exists(self.export_filename):
with NWBHDF5IO(self.filename, mode='r') as io:
errors = pynwb_validate(io)
if errors:
for err in errors:
raise Exception(err)


class AcquisitionH5IOMixin(NWBH5IOMixin):
"""
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/hdf5/test_ophys.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def setUpContainer(self):
indicator='GFP',
location='somewhere in the brain',
reference_frame='unknown',
origin_coords=[10, 20],
origin_coords=[10., 20.],
origin_coords_unit='millimeters',
grid_spacing=[0.001, 0.001],
grid_spacing_unit='millimeters',
Expand Down

0 comments on commit 4eb8468

Please sign in to comment.