From 8e20a1c670ba5fe8d972e15abdb378baa14839cb Mon Sep 17 00:00:00 2001 From: damienkelly Date: Fri, 11 May 2018 15:22:51 -0700 Subject: [PATCH] =?UTF-8?q?spatialmedia:=20Define=20mpeg=20tags=20as=20byt?= =?UTF-8?q?es=20objects.=20This=20maintains=20Pytho=E2=80=A6=20(#196)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * spatialmedia: Define mpeg tags as bytes objects. This maintains Python3 compatibility and fixes the handling of files with non-ASCII atom types e.g. (User data atoms like '(c)swr'). Also, add Python3 support for spatial audio metadata injection. * Modifies the spatial audio specification to enable the inclusion of head-locked (non-diegetic) stereo. --- docs/spatial-audio-rfc.md | 63 ++++------------------- spatialmedia/__main__.py | 18 ++++++- spatialmedia/gui.py | 29 +++++++---- spatialmedia/metadata_utils.py | 74 ++++++++++++++++++++-------- spatialmedia/mpeg/box.py | 6 +-- spatialmedia/mpeg/constants.py | 67 +++++++++++++------------ spatialmedia/mpeg/container.py | 8 +-- spatialmedia/mpeg/mpeg4_container.py | 8 +-- spatialmedia/mpeg/sa3d.py | 24 ++++++--- 9 files changed, 160 insertions(+), 137 deletions(-) diff --git a/docs/spatial-audio-rfc.md b/docs/spatial-audio-rfc.md index 1f79dfa..bed8d55 100644 --- a/docs/spatial-audio-rfc.md +++ b/docs/spatial-audio-rfc.md @@ -1,12 +1,12 @@ # Spatial Audio RFC (draft) -*This document describes an open metadata scheme by which MP4 multimedia containers may accommodate spatial and non-diegetic audio. Comments are welcome on the [spatial-media-discuss](https://groups.google.com/forum/#!forum/spatial-media-discuss) mailing list or by [filing an issue](https://github.com/google/spatial-media/issues) on GitHub.* +*This document describes an open metadata scheme by which MP4 multimedia containers may accommodate spatial and head-locked stereo audio. Comments are welcome on the [spatial-media-discuss](https://groups.google.com/forum/#!forum/spatial-media-discuss) mailing list or by [filing an issue](https://github.com/google/spatial-media/issues) on GitHub.* ------------------------------------------------------ ## Metadata Format ### MP4 -Spatial audio metadata is stored in a new box, `SA3D`, defined in this RFC. Non-diegetic audio metadata is stored in a new box, `SAND`, defined in this RFC. The metadata is applicable to individual tracks in an MP4 container. +Spatial audio metadata is stored in a new box, `SA3D`, defined in this RFC. #### Spatial Audio Box (SA3D) ##### Definition @@ -35,7 +35,9 @@ aligned(8) class SpatialAudioBox extends Box(‘SA3D’) { ##### Semantics - `version` is an 8-bit unsigned integer that specifies the version of this box. Must be set to `0`. -- `ambisonic_type` is an 8-bit unsigned integer that specifies the type of ambisonic audio represented; the following values are defined: +- `head_locked_stereo` is a 1-bit flag used to indicate that the stored audio track contains head-locked stereo audio in addition to ambisonics audio. The flag should be set if the track contains head-locked stereo and unset otherwise. + +- `ambisonic_type` is a 7-bit unsigned integer that specifies the type of ambisonic audio represented; the following values are defined: | `ambisonic_type` | Ambisonic Type Description | |:-----------------|:---------------------------| @@ -59,9 +61,11 @@ aligned(8) class SpatialAudioBox extends Box(‘SA3D’) { - `channel_map` is a sequence of 32-bit unsigned integers that maps audio channels in a given audio track to ambisonic components, given the defined `ambisonic_channel_ordering`. The sequence of `channel_map` values should match the channel sequence within the given audio track. - For example, consider a 4-channel audio track containing ambisonic components *W*, *X*, *Y*, *Z* at channel indexes 0, 1, 2, 3, respectively. For `ambisonic_channel_ordering = 0` (ACN), the ordering of components should be *W*, *Y*, *Z*, *X*, so the `channel_map` sequence should be `0`, `2`, `3`, `1`. + For the example case of `ambisonic_type = 0` (Periphonic), consider a 4-channel audio track containing ambisonic components *W*, *X*, *Y*, *Z* at channel indexes `0`, `1`, `2`, `3`, respectively. For `ambisonic_channel_ordering = 0` (ACN), the ordering of components should be *W*, *Y*, *Z*, *X*, so the `channel_map` sequence should be `0`, `2`, `3`, `1`. + + As a simpler example, for a 4-channel audio track containing ambisonic components *W*, *Y*, *Z*, *X* at channel indexes `0`, `1`, `2`, `3`, respectively, the `channel_map` sequence should be specified as `0`, `1`, `2`, `3` when `ambisonic_channel_ordering = 0` (ACN). - As a simpler example, for a 4-channel audio track containing ambisonic components *W*, *Y*, *Z*, *X* at channel indexes 0, 1, 2, 3, respectively, the `channel_map` sequence should be specified as `0`, `1`, `2`, `3` when `ambisonic_channel_ordering = 0` (ACN). + For the example case of `ambisonic_type = 0` (Periphonic) with `head_locked_stereo = 1`, the stored audio will consist of `4` ambisonic components *W*, *Y*, *Z*, *X* in addition to head-locked stereo components *L* and *R*. In this case, the SA3D atom will define `num_channels = 6` and a `channel_map` specified as `0`, `1`, `2`, `3`, `4`, `5` indicating that the channels are laid out in the file as *W*, *Y*, *Z*, *X*, *L*, *R*. This representation extends to different layouts of ambisonics and head-locked stereo components. For example, a channel layout of `4`, `5`, `0`, `1`, `2`, `3` indicates that the layout of the stored audio is *L*, *R*, *W*, *Y*, *Z*, *X*. ##### Example @@ -94,55 +98,6 @@ where the `SA3D` box has the following data: ------------------------------------------------------ -#### Non-Diegetic Audio Box (SAND) -##### Definition -Box Type: `SAND` -Container: Sound Sample Description box (e.g., `mp4a`, `lpcm`, `sowt`, etc.) -Mandatory: No -Quantity: Zero or one - -When present, provides additional information about the non-diegetic audio content contained in this audio track. This can be used alongisde `SA3D` in a head-tracked virtual reality experience to provide audio which should remain unchanged by listener head rotation; e.g., narration or stereo music. - -##### Syntax -``` -aligned(8) class NonDiegeticAudioBox extends Box(‘SAND’) { - unsigned int(8) version; -} -``` - -##### Semantics -- `version` is an 8-bit unsigned integer that specifies the version of this box. Must be set to `0`. - -##### Example - -Here is an example MP4 box hierarchy for a file containing the `SA3D` and `SAND` boxes, to mix spatial audio with non-diegetic audio: - -- moov - - trak - - mdia - - minf - - stbl - - stsd - - mp4a - - esds - - SA3D - - trak - - mdia - - minf - - stbl - - stsd - - mp4a - - esds - - SAND - -where the `SAND` box has the following data: - -| Field Name | Value | -|:-----------|:-----| -| `version` | `0` | - ------------------------------------------------------- - ## Appendix 1 - Ambisonics The traditional notion of ambisonics is used, where the sound field is represented by spherical harmonics coefficients using the *associated Legendre polynomials* (without *Condon-Shortley phase*) as the basis functions. Thus, the spherical harmonic of degree `l` and order `m` at elevation `E` and azimuth `A` is given by: diff --git a/spatialmedia/__main__.py b/spatialmedia/__main__.py index 6559d75..e0a8349 100755 --- a/spatialmedia/__main__.py +++ b/spatialmedia/__main__.py @@ -89,7 +89,18 @@ def main(): args.crop) if args.spatial_audio: - metadata.audio = metadata_utils.SPATIAL_AUDIO_DEFAULT_METADATA + parsed_metadata = metadata_utils.parse_metadata(args.file[0], console) + if not metadata.audio: + spatial_audio_description = metadata_utils.get_spatial_audio_description( + parsed_metadata.num_audio_channels) + if spatial_audio_description.is_supported: + metadata.audio = metadata_utils.get_spatial_audio_metadata( + spatial_audio_description.order, + spatial_audio_description.has_head_locked_stereo) + else: + console("Audio has %d channel(s) and is not a supported " + "spatial audio format." % (parsed_metadata.num_audio_channels)) + return if metadata.video: metadata_utils.inject_metadata(args.file[0], args.file[1], metadata, @@ -100,6 +111,11 @@ def main(): if len(args.file) > 0: for input_file in args.file: + if args.spatial_audio: + parsed_metadata = metadata_utils.parse_metadata(input_file, console) + metadata.audio = metadata_utils.get_spatial_audio_description( + parsed_metadata.num_channels) + metadata_utils.parse_metadata(input_file, console) return diff --git a/spatialmedia/gui.py b/spatialmedia/gui.py index b0d5a96..96eee80 100755 --- a/spatialmedia/gui.py +++ b/spatialmedia/gui.py @@ -15,7 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Spatial Media Metadata Injector GUI +"""Spatial Media Metadata Injector GUI GUI application for examining/injecting spatial media metadata in MP4/MOV files. """ @@ -36,8 +36,10 @@ path = os.path.dirname(sys.modules[__name__].__file__) path = os.path.join(path, '..') sys.path.insert(0, path) -from spatialmedia import metadata_utils +from spatialmedia import metadata_utils +SPATIAL_AUDIO_LABEL = "My video has spatial audio (ambiX ACN/SN3D format)" +HEAD_LOCKED_STEREO_LABEL = "with head-locked stereo" class Console(object): def __init__(self): @@ -85,7 +87,8 @@ def action_open(self): file_extension = os.path.splitext(infile)[1].lower() self.var_spherical.set(1) - self.enable_spatial_audio = parsed_metadata.num_audio_channels == 4 + self.spatial_audio_description = metadata_utils.get_spatial_audio_description( + parsed_metadata.num_audio_channels) if not metadata: self.var_3d.set(0) @@ -108,7 +111,7 @@ def action_open(self): if audio_metadata: self.var_spatial_audio.set(1) - print audio_metadata.get_metadata_string() + print(audio_metadata.get_metadata_string()) self.update_state() @@ -121,7 +124,9 @@ def action_inject_delay(self): metadata.video = metadata_utils.generate_spherical_xml(stereo=stereo) if self.var_spatial_audio.get(): - metadata.audio = metadata_utils.SPATIAL_AUDIO_DEFAULT_METADATA + metadata.audio = metadata_utils.get_spatial_audio_metadata( + self.spatial_audio_description.order, + self.spatial_audio_description.has_head_locked_stereo) console = Console() metadata_utils.inject_metadata( @@ -172,12 +177,18 @@ def update_state(self): if self.var_spherical.get(): self.checkbox_3D.configure(state="normal") self.button_inject.configure(state="normal") - if self.enable_spatial_audio: + if self.spatial_audio_description.is_supported: self.checkbox_spatial_audio.configure(state="normal") else: self.checkbox_3D.configure(state="disabled") self.button_inject.configure(state="disabled") self.checkbox_spatial_audio.configure(state="disabled") + if self.spatial_audio_description.has_head_locked_stereo: + self.label_spatial_audio.configure( + text='{}\n{}'.format( + SPATIAL_AUDIO_LABEL, HEAD_LOCKED_STEREO_LABEL)) + else: + self.label_spatial_audio.configure(text=SPATIAL_AUDIO_LABEL) def set_error(self, text): self.label_message["text"] = text @@ -234,8 +245,8 @@ def create_widgets(self): # Spatial Audio Checkbox row += 1 column = 0 - self.label_spatial_audio = Label(self, anchor=W) - self.label_spatial_audio["text"] = "My video has spatial audio (ambiX ACN/SN3D format)" + self.label_spatial_audio = Label(self, anchor=W, justify=LEFT) + self.label_spatial_audio["text"] = SPATIAL_AUDIO_LABEL self.label_spatial_audio.grid(row=row, column=column, padx=PAD_X, pady=7, sticky=W) column += 1 @@ -287,7 +298,7 @@ def __init__(self, master=None): master.attributes("-topmost", True) master.focus_force() self.after(50, lambda: master.attributes("-topmost", False)) - self.enable_spatial_audio = False + self.spatial_audio_description = None def report_callback_exception(self, *args): exception = traceback.format_exception(*args) diff --git a/spatialmedia/metadata_utils.py b/spatialmedia/metadata_utils.py index 2ea7dc1..71c11ca 100755 --- a/spatialmedia/metadata_utils.py +++ b/spatialmedia/metadata_utils.py @@ -17,6 +17,7 @@ """Utilities for examining/injecting spatial media metadata in MP4/MOV files.""" +import collections import os import re import struct @@ -85,14 +86,6 @@ "CroppedAreaTopPixels", ] -SPATIAL_AUDIO_DEFAULT_METADATA = { - "ambisonic_order": 1, - "ambisonic_type": "periphonic", - "ambisonic_channel_ordering": "ACN", - "ambisonic_normalization": "SN3D", - "channel_map": [0, 1, 2, 3], -} - class Metadata(object): def __init__(self): self.video = None @@ -112,6 +105,23 @@ def __init__(self): integer_regex_group = "(\d+)" crop_regex = "^{0}$".format(":".join([integer_regex_group] * 6)) +MAX_SUPPORTED_AMBIX_ORDER = 1 + +SpatialAudioDescription = collections.namedtuple( + 'SpatialAudioDescription', + 'order is_supported has_head_locked_stereo') + +def get_spatial_audio_description(num_channels): + for i in range(1, MAX_SUPPORTED_AMBIX_ORDER+1): + if (i + 1)*(i + 1) == num_channels: + return SpatialAudioDescription( + order=i, is_supported=True, has_head_locked_stereo=False) + elif ((i + 1)*(i + 1) + 2) == num_channels: + return SpatialAudioDescription( + order=i, is_supported=True, has_head_locked_stereo=True) + + return SpatialAudioDescription( + order=-1, is_supported=False, has_head_locked_stereo=True) def spherical_uuid(metadata): """Constructs a uuid containing spherical metadata. @@ -154,7 +164,7 @@ def mpeg4_add_spherical(mpeg4_file, in_fh, metadata): continue position = mdia_sub_element.content_start() + 8 in_fh.seek(position) - if in_fh.read(4).decode() == mpeg.constants.TRAK_TYPE_VIDE: + if in_fh.read(4) == mpeg.constants.TRAK_TYPE_VIDE: added = True break @@ -174,7 +184,7 @@ def mpeg4_add_spatial_audio(mpeg4_file, in_fh, audio_metadata, console): mpeg4_file: mpeg4, Mpeg4 file structure to add metadata. in_fh: file handle, Source for uncached file contents. audio_metadata: dictionary ('ambisonic_type': string, - 'ambisonic_order': int), + 'ambisonic_order': int, 'head_locked_stereo': Bool), Supports 'periphonic' ambisonic type only. """ for element in mpeg4_file.moov_box.contents: @@ -187,7 +197,7 @@ def mpeg4_add_spatial_audio(mpeg4_file, in_fh, audio_metadata, console): continue position = mdia_sub_element.content_start() + 8 in_fh.seek(position) - if in_fh.read(4).decode() == mpeg.constants.TAG_SOUN: + if in_fh.read(4) == mpeg.constants.TAG_SOUN: return inject_spatial_audio_atom( in_fh, sub_element, audio_metadata, console) return True @@ -218,18 +228,22 @@ def inject_spatial_audio_atom( sample_description.header_size + 16) num_channels = get_num_audio_channels( sub_element, in_fh) - num_ambisonic_components = \ - get_expected_num_audio_components( + expected_num_channels = \ + get_expected_num_audio_channels( audio_metadata["ambisonic_type"], - audio_metadata["ambisonic_order"]) - if num_channels != num_ambisonic_components: + audio_metadata["ambisonic_order"], + audio_metadata["head_locked_stereo"]) + if num_channels != expected_num_channels: + head_locked_stereo_msg = (" with head-locked stereo" if + audio_metadata["head_locked_stereo"] else "") err_msg = "Error: Found %d audio channel(s). "\ "Expected %d channel(s) for %s ambisonics "\ - "of order %d."\ + "of order %d%s."\ % (num_channels, - num_ambisonic_components, + expected_num_channels, audio_metadata["ambisonic_type"], - audio_metadata["ambisonic_order"]) + audio_metadata["ambisonic_order"], + head_locked_stereo_msg) console(err_msg) return False sa3d_atom = mpeg.SA3DBox.create( @@ -505,12 +519,15 @@ def get_descriptor_length(in_fh): return descriptor_length -def get_expected_num_audio_components(ambisonics_type, ambisonics_order): +def get_expected_num_audio_channels( + ambisonics_type, ambisonics_order, head_locked_stereo): """ Returns the expected number of ambisonic components for a given ambisonic type and ambisonic order. """ + head_locked_stereo_channels = 2 if head_locked_stereo == True else 0 if (ambisonics_type == 'periphonic'): - return ((ambisonics_order + 1) * (ambisonics_order + 1)) + return (((ambisonics_order + 1) * (ambisonics_order + 1)) + + head_locked_stereo_channels) else: return -1 @@ -631,3 +648,20 @@ def get_num_audio_tracks(mpeg4_file, in_fh): if (in_fh.read(4) == mpeg.constants.TAG_SOUN): num_audio_tracks += 1 return num_audio_tracks + + +def get_spatial_audio_metadata(ambisonic_order, head_locked_stereo): + num_channels = get_expected_num_audio_channels( + "periphonic", ambisonic_order, head_locked_stereo) + metadata = { + "ambisonic_order": 0, + "head_locked_stereo": False, + "ambisonic_type": "periphonic", + "ambisonic_channel_ordering": "ACN", + "ambisonic_normalization": "SN3D", + "channel_map": [], + } + metadata['ambisonic_order'] = ambisonic_order + metadata['head_locked_stereo'] = head_locked_stereo + metadata['channel_map'] = range(0, num_channels) + return metadata diff --git a/spatialmedia/mpeg/box.py b/spatialmedia/mpeg/box.py index dd02d1b..0ebacf8 100755 --- a/spatialmedia/mpeg/box.py +++ b/spatialmedia/mpeg/box.py @@ -41,7 +41,7 @@ def load(fh, position, end): fh.seek(position) header_size = 8 size = struct.unpack(">I", fh.read(4))[0] - name = fh.read(4).decode() + name = fh.read(4) if size == 1: size = struct.unpack(">Q", fh.read(8))[0] @@ -88,11 +88,11 @@ def save(self, in_fh, out_fh, delta): """ if self.header_size == 16: out_fh.write(struct.pack(">I", 1)) - out_fh.write(self.name.encode()) + out_fh.write(self.name) out_fh.write(struct.pack(">Q", self.size())) elif self.header_size == 8: out_fh.write(struct.pack(">I", self.size())) - out_fh.write(self.name.encode()) + out_fh.write(self.name) if self.content_start(): in_fh.seek(self.content_start()) diff --git a/spatialmedia/mpeg/constants.py b/spatialmedia/mpeg/constants.py index 7313811..eb61562 100755 --- a/spatialmedia/mpeg/constants.py +++ b/spatialmedia/mpeg/constants.py @@ -17,45 +17,45 @@ """MPEG-4 constants.""" -TRAK_TYPE_VIDE = "vide" +TRAK_TYPE_VIDE = b"vide" # Leaf types. -TAG_STCO = "stco" -TAG_CO64 = "co64" -TAG_FREE = "free" -TAG_MDAT = "mdat" -TAG_XML = "xml " -TAG_HDLR = "hdlr" -TAG_FTYP = "ftyp" -TAG_ESDS = "esds" -TAG_SOUN = "soun" -TAG_SA3D = "SA3D" +TAG_STCO = b"stco" +TAG_CO64 = b"co64" +TAG_FREE = b"free" +TAG_MDAT = b"mdat" +TAG_XML = b"xml " +TAG_HDLR = b"hdlr" +TAG_FTYP = b"ftyp" +TAG_ESDS = b"esds" +TAG_SOUN = b"soun" +TAG_SA3D = b"SA3D" # Container types. -TAG_MOOV = "moov" -TAG_UDTA = "udta" -TAG_META = "meta" -TAG_TRAK = "trak" -TAG_MDIA = "mdia" -TAG_MINF = "minf" -TAG_STBL = "stbl" -TAG_STSD = "stsd" -TAG_UUID = "uuid" -TAG_WAVE = "wave" +TAG_MOOV = b"moov" +TAG_UDTA = b"udta" +TAG_META = b"meta" +TAG_TRAK = b"trak" +TAG_MDIA = b"mdia" +TAG_MINF = b"minf" +TAG_STBL = b"stbl" +TAG_STSD = b"stsd" +TAG_UUID = b"uuid" +TAG_WAVE = b"wave" # Sound sample descriptions. -TAG_NONE = "NONE" -TAG_RAW_ = "raw " -TAG_TWOS = "twos" -TAG_SOWT = "sowt" -TAG_FL32 = "fl32" -TAG_FL64 = "fl64" -TAG_IN24 = "in24" -TAG_IN32 = "in32" -TAG_ULAW = "ulaw" -TAG_ALAW = "alaw" -TAG_LPCM = "lpcm" -TAG_MP4A = "mp4a" +TAG_NONE = b"NONE" +TAG_RAW_ = b"raw " +TAG_TWOS = b"twos" +TAG_SOWT = b"sowt" +TAG_FL32 = b"fl32" +TAG_FL64 = b"fl64" +TAG_IN24 = b"in24" +TAG_IN32 = b"in32" +TAG_ULAW = b"ulaw" +TAG_ALAW = b"alaw" +TAG_LPCM = b"lpcm" +TAG_MP4A = b"mp4a" SOUND_SAMPLE_DESCRIPTIONS = frozenset([ TAG_NONE, @@ -82,4 +82,3 @@ TAG_UDTA, TAG_WAVE, ]).union(SOUND_SAMPLE_DESCRIPTIONS) - diff --git a/spatialmedia/mpeg/container.py b/spatialmedia/mpeg/container.py index e215209..b5f704a 100755 --- a/spatialmedia/mpeg/container.py +++ b/spatialmedia/mpeg/container.py @@ -33,12 +33,12 @@ def load(fh, position, end): fh.seek(position) header_size = 8 size = struct.unpack(">I", fh.read(4))[0] - name = fh.read(4).decode() + name = fh.read(4) is_box = name not in constants.CONTAINERS_LIST # Handle the mp4a decompressor setting (wave -> mp4a). if name == constants.TAG_MP4A and size == 12: - is_box = True + is_box = True if is_box: if name == constants.TAG_SA3D: return sa3d.load(fh, position, end) @@ -196,11 +196,11 @@ def save(self, in_fh, out_fh, delta): """ if self.header_size == 16: out_fh.write(struct.pack(">I", 1)) - out_fh.write(self.name.encode()) + out_fh.write(self.name) out_fh.write(struct.pack(">Q", self.size())) elif self.header_size == 8: out_fh.write(struct.pack(">I", self.size())) - out_fh.write(self.name.encode()) + out_fh.write(self.name) if self.padding > 0: in_fh.seek(self.content_start()) diff --git a/spatialmedia/mpeg/mpeg4_container.py b/spatialmedia/mpeg/mpeg4_container.py index 157654f..d429137 100755 --- a/spatialmedia/mpeg/mpeg4_container.py +++ b/spatialmedia/mpeg/mpeg4_container.py @@ -52,14 +52,14 @@ def load(fh): loaded_mpeg4.contents = contents for element in loaded_mpeg4.contents: - if (element.name == "moov"): + if (element.name == constants.TAG_MOOV): loaded_mpeg4.moov_box = element - if (element.name == "free"): + if (element.name == constants.TAG_FREE): loaded_mpeg4.free_box = element - if (element.name == "mdat" + if (element.name == constants.TAG_MDAT and not loaded_mpeg4.first_mdat_box): loaded_mpeg4.first_mdat_box = element - if (element.name == "ftyp"): + if (element.name == constants.TAG_FTYP): loaded_mpeg4.ftyp_box = element if not loaded_mpeg4.moov_box: diff --git a/spatialmedia/mpeg/sa3d.py b/spatialmedia/mpeg/sa3d.py index 1bc34c7..72c67ec 100644 --- a/spatialmedia/mpeg/sa3d.py +++ b/spatialmedia/mpeg/sa3d.py @@ -44,7 +44,7 @@ def load(fh, position=None, end=None): new_box = SA3DBox() new_box.position = position size = struct.unpack(">I", fh.read(4))[0] - name = fh.read(4).decode() + name = fh.read(4) if (name != constants.TAG_SA3D): print("Error: box is not an SA3D box.") @@ -57,6 +57,8 @@ def load(fh, position=None, end=None): new_box.content_size = size - new_box.header_size new_box.version = struct.unpack(">B", fh.read(1))[0] new_box.ambisonic_type = struct.unpack(">B", fh.read(1))[0] + new_box.head_locked_stereo = (new_box.ambisonic_type & int('10000000', 2) != 0) + new_box.ambisonic_type = new_box.ambisonic_type & int('01111111', 2) new_box.ambisonic_order = struct.unpack(">I", fh.read(4))[0] new_box.ambisonic_channel_ordering = struct.unpack(">B", fh.read(1))[0] new_box.ambisonic_normalization = struct.unpack(">B", fh.read(1))[0] @@ -78,6 +80,7 @@ def __init__(self): self.header_size = 8 self.version = 0 self.ambisonic_type = 0 + self.head_locked_stereo = False self.ambisonic_order = 0 self.ambisonic_channel_ordering = 0 self.ambisonic_normalization = 0 @@ -93,6 +96,7 @@ def create(num_channels, audio_metadata): new_box.content_size += 1 # uint8 new_box.ambisonic_type = SA3DBox.ambisonic_types[ audio_metadata["ambisonic_type"]] + new_box.head_locked_stereo = audio_metadata["head_locked_stereo"] new_box.content_size += 1 # uint8 new_box.ambisonic_order = audio_metadata["ambisonic_order"] new_box.content_size += 4 # uint32 @@ -112,16 +116,16 @@ def create(num_channels, audio_metadata): return new_box def ambisonic_type_name(self): - return (key for key,value in SA3DBox.ambisonic_types.items() - if value==self.ambisonic_type).next() + return next((key for key,value in SA3DBox.ambisonic_types.items() + if value==self.ambisonic_type)) def ambisonic_channel_ordering_name(self): - return (key for key,value in SA3DBox.ambisonic_orderings.items() - if value==self.ambisonic_channel_ordering).next() + return next((key for key,value in SA3DBox.ambisonic_orderings.items() + if value==self.ambisonic_channel_ordering)) def ambisonic_normalization_name(self): - return (key for key,value in SA3DBox.ambisonic_normalizations.items() - if value==self.ambisonic_normalization).next() + return next((key for key,value in SA3DBox.ambisonic_normalizations.items() + if value==self.ambisonic_normalization)) def print_box(self, console): """ Prints the contents of this spatial audio (SA3D) box to the @@ -131,6 +135,7 @@ def print_box(self, console): channel_ordering = self.ambisonic_channel_ordering_name() ambisonic_normalization = self.ambisonic_normalization_name() console("\t\tAmbisonic Type: %s" % ambisonic_type) + console("\t\tContains Head-Locked Stereo: %r" % self.head_locked_stereo) console("\t\tAmbisonic Order: %d" % self.ambisonic_order) console("\t\tAmbisonic Channel Ordering: %s" % channel_ordering) console("\t\tAmbisonic Normalization: %s" % ambisonic_normalization) @@ -157,8 +162,11 @@ def save(self, in_fh, out_fh, delta): out_fh.write(struct.pack(">I", self.size())) out_fh.write(self.name) + ambisonic_type = ( + self.ambisonic_type | int('10000000', 2) if + self.head_locked_stereo else self.ambisonic_type & int('01111111', 2)) out_fh.write(struct.pack(">B", self.version)) - out_fh.write(struct.pack(">B", self.ambisonic_type)) + out_fh.write(struct.pack(">B", ambisonic_type)) out_fh.write(struct.pack(">I", self.ambisonic_order)) out_fh.write(struct.pack(">B", self.ambisonic_channel_ordering)) out_fh.write(struct.pack(">B", self.ambisonic_normalization))