Skip to content

Commit

Permalink
Add `option.mkvUseIndex to use index to skip Matroska cluster elements
Browse files Browse the repository at this point in the history
  • Loading branch information
Borewit committed Aug 15, 2024
1 parent 1fe4049 commit 1a713b7
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 9 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,11 @@ import { parseFile, selectCover } from 'music-metadata';
- `observer: (update: MetadataEvent) => void;`: Will be called after each change to `common` (generic) tag, or `format` properties.
- `skipCovers`: default: `false`, if set to `true`, it will not return embedded cover-art (images).
- `skipPostHeaders? boolean` default: `false`, if set to `true`, it will not search all the entire track for additional headers. Only recommenced to use in combination with streams.
- `includeChapters` default: `false`, if set to `true`, it will parse chapters (currently only MP4 files). _experimental functionality_
- `mkvUseIndex` default: `false`, if set to `true`, in Matroska based files, use the _SeekHead_ element index to skip _segment/cluster_ elements..
_experimental functionality_
Can have a significant performance impact if enabled.
Possible side effect can be that certain metadata maybe skipped, depending on the index.
If there is no _SeekHead_ element present in the Matroska file, this flag has no effect.
Although in most cases duration is included, in some cases it requires `music-metadata` parsing the entire file.
To enforce parsing the entire file if needed you should set `duration` to `true`.
Expand Down
28 changes: 26 additions & 2 deletions lib/matroska/MatroskaParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import type { ITokenizer } from 'strtok3';
import type { INativeMetadataCollector } from '../common/MetadataCollector.js';
import { BasicParser } from '../common/BasicParser.js';
import { matroskaDtd } from './MatroskaDtd.js';
import { type IAttachments, type ISegmentInformation, type ITags, type ITrackElement, type ITrackEntry, TargetType, TrackType } from './types.js';
import { type IAttachments, type ISeekHead, type ISegmentInformation, type ITags, type ITrackElement, type ITrackEntry, TargetType, TrackType } from './types.js';

import type { AnyTagValue, IOptions, ITrackInfo } from '../type.js';
import type { ITokenParser } from '../ParserFactory.js';
Expand All @@ -21,6 +21,14 @@ const debug = initDebug('music-metadata:parser:matroska');
*/
export class MatroskaParser extends BasicParser {

private seekHead: ISeekHead | undefined;
private seekHeadOffset = 0;
/**
* Use index to skip multiple segment/cluster elements at once.
* Significant performance impact
*/
private flagUseIndexToSkipClusters = false;

/**
* Initialize parser with output (metadata), input (tokenizer) & parsing options (options).
* @param {INativeMetadataCollector} metadata Output
Expand All @@ -29,6 +37,7 @@ export class MatroskaParser extends BasicParser {
*/
public init(metadata: INativeMetadataCollector, tokenizer: ITokenizer, options: IOptions): ITokenParser {
super.init(metadata, tokenizer, options);
this.flagUseIndexToSkipClusters = options.mkvUseIndex ?? false;
return this;
}

Expand All @@ -46,18 +55,33 @@ export class MatroskaParser extends BasicParser {
debug(`Skip element: name=${element.name}, id=0x${element.id.toString(16)}`);
return ParseAction.IgnoreElement;
case 0x1f43b675: // cluster
if (this.flagUseIndexToSkipClusters && this.seekHead) {
const index = this.seekHead.seek.find(index => index.position + this.seekHeadOffset > this.tokenizer.position);
if (index) {
// Go to next index position
const ignoreSize = index.position + this.seekHeadOffset - this.tokenizer.position;
debug(`Use index to go to next position, ignoring ${ignoreSize} bytes`);
this.tokenizer.ignore(ignoreSize);
return ParseAction.SkipElement;
}
}
return ParseAction.IgnoreElement;
default:
return ParseAction.ReadNext;
}
},
elementValue: async (element, value) => {
elementValue: async (element, value, offset) => {
debug(`Received: name=${element.name}, value=${value}`);
switch (element.id) {
case 0x4282: // docType
this.metadata.setFormat('container', `EBML/${value}`);
break;

case 0x114d9b74:
this.seekHead = value as unknown as ISeekHead
this.seekHeadOffset = offset;
break;

case 0x1549a966: { // Info (Segment Information)
const info = value as ISegmentInformation;
const timecodeScale = info.timecodeScale ? info.timecodeScale : 1000000;
Expand Down
10 changes: 10 additions & 0 deletions lib/type.ts
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,16 @@ export interface IOptions {
* Set observer for async callbacks to common or format.
*/
observer?: Observer;

/**
* In Matroska based files, use the _SeekHead_ element index to skip _segment/cluster_ elements.
* By default, disabled
* Can have a significant performance impact if enabled.
* Possible side effect can be that certain metadata maybe skipped, depending on the index.
* If there is no _SeekHead_ element present in the Matroska file, this flag has no effect
* Ref: https://www.matroska.org/technical/diagram.html
*/
mkvUseIndex?: boolean;
}

export interface IApeHeader extends IOptions {
Expand Down
34 changes: 28 additions & 6 deletions test/test-file-matroska.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ describe('Matroska formats', () => {
assert.strictEqual(format.numberOfChannels, 2, 'format.numberOfChannels');
});

it('parse: "02 - Poxfil - Solid Ground (5 sec).mka"', async () => {
async function parsePoxfile(options?: mm.IOptions) {

const mkaPath = path.join(matroskaSamplePath, '02 - Poxfil - Solid Ground (5 sec).mka');

const {format, common} = await mm.parseFile(mkaPath, {duration: false});
const {format, common} = await mm.parseFile(mkaPath, options);

// format chunk information
assert.strictEqual(format.container, 'EBML/matroska', 'format.container');
Expand All @@ -54,6 +54,14 @@ describe('Matroska formats', () => {
assert.strictEqual(format.numberOfChannels, 2, 'format.numberOfChannels');

verifyTrackSolidGround(common);
}

it('parse: "02 - Poxfil - Solid Ground (5 sec).mka"', () => {
return parsePoxfile();
});

it('parse: "02 - Poxfil - Solid Ground (5 sec).mka" with `mkvUseIndex` flag', () => {
return parsePoxfile({mkvUseIndex: true});
});
});

Expand Down Expand Up @@ -116,7 +124,7 @@ describe('Matroska formats', () => {
assert.strictEqual(common.encodersettings, '--bitrate 96 --vbr', 'common.encodersettings');
});

it('should ignore trailing null characters', async () => {
it('shoud ignore trailing null characters', async () => {
const webmPath = path.join(matroskaSamplePath, 'fixture-null.webm');
const {format} = await mm.parseFile(webmPath, {duration: false});
assert.strictEqual(format.container, 'EBML/webm', 'format.container');
Expand All @@ -127,11 +135,10 @@ describe('Matroska formats', () => {
// https://github.com/Borewit/music-metadata/issues/384
describe('Multiple audio tracks', () => {

it('parse: "matroska-test-w1-test5-short.mkv"', async () => {

async function parse(options?: mm.IOptions) {
const mkvPath = path.join(matroskaSamplePath, 'matroska-test-w1-test5-short.mkv');

const {format, common} = await mm.parseFile(mkvPath);
const {format, common} = await mm.parseFile(mkvPath, options);

assert.deepEqual(format.container, 'EBML/matroska', 'format.container');
assert.deepEqual(format.tagTypes, [ 'matroska' ], 'format.tagTypes');
Expand All @@ -143,6 +150,14 @@ describe('Matroska formats', () => {

assert.deepEqual(common.title, 'Elephant Dreams', 'common.title');
assert.deepEqual(common.album, 'Matroska Test Files - Wave 1', 'common.album');
}

it('parse: "matroska-test-w1-test5-short.mkv"', () => {
return parse();
});

it('parse: "matroska-test-w1-test5-short.mkv `mkvUseIndex` flag', () => {
return parse({mkvUseIndex: true});
});

});
Expand All @@ -160,6 +175,13 @@ describe('Matroska formats', () => {
assert.strictEqual(format.numberOfChannels, 1, 'format.numberOfChannels');
});

it('Parse stream with `mkvUseIndex` flag', async () => {
const {format} = await mm.parseFile(mkvPath, {mkvUseIndex: true});
assert.strictEqual(format.container, 'EBML/webm', 'format.container');
assert.strictEqual(format.codec, 'OPUS', 'format.codec');
assert.strictEqual(format.numberOfChannels, 1, 'format.numberOfChannels');
});

});

describe('Handle corrupt Matroska file', () => {
Expand Down

0 comments on commit 1a713b7

Please sign in to comment.