diff --git a/demo/app.ts b/demo/app.ts index 9ad6110..23b9f60 100644 --- a/demo/app.ts +++ b/demo/app.ts @@ -1,5 +1,9 @@ import "media-chrome"; -import { BabyMediaSource, BabyVideoElement } from "../src/index"; +import { + BabyMediaSource, + BabySourceBuffer, + BabyVideoElement +} from "../src/index"; import { TimeRanges } from "../src/time-ranges"; import { waitForEvent } from "../src/util"; @@ -14,6 +18,7 @@ video.addEventListener("pause", logEvent); video.addEventListener("playing", logEvent); // video.addEventListener("timeupdate", logEvent); video.addEventListener("durationchange", logEvent); +video.addEventListener("ratechange", logEvent); video.addEventListener("seeking", logEvent); video.addEventListener("seeked", logEvent); video.addEventListener("progress", logEvent); @@ -28,24 +33,51 @@ if (mediaSource.readyState !== "open") { await waitForEvent(mediaSource, "sourceopen"); } mediaSource.duration = streamDuration; -const sourceBuffer = mediaSource.addSourceBuffer( +const videoSourceBuffer = mediaSource.addSourceBuffer( 'video/mp4; codecs="avc1.640028"' ); -const segmentURLs = [ +const audioSourceBuffer = mediaSource.addSourceBuffer( + 'audio/mp4; codecs="mp4a.40.5"' +); +const videoSegmentURLs = [ "https://dash.akamaized.net/akamai/bbb_30fps/bbb_30fps_640x360_1000k/bbb_30fps_640x360_1000k_0.m4v", "https://dash.akamaized.net/akamai/bbb_30fps/bbb_30fps_640x360_1000k/bbb_30fps_640x360_1000k_1.m4v", "https://dash.akamaized.net/akamai/bbb_30fps/bbb_30fps_640x360_1000k/bbb_30fps_640x360_1000k_2.m4v", "https://dash.akamaized.net/akamai/bbb_30fps/bbb_30fps_1920x1080_8000k/bbb_30fps_1920x1080_8000k_0.m4v", "https://dash.akamaized.net/akamai/bbb_30fps/bbb_30fps_1920x1080_8000k/bbb_30fps_1920x1080_8000k_2.m4v", "https://dash.akamaized.net/akamai/bbb_30fps/bbb_30fps_1920x1080_8000k/bbb_30fps_1920x1080_8000k_3.m4v", - "https://dash.akamaized.net/akamai/bbb_30fps/bbb_30fps_1920x1080_8000k/bbb_30fps_1920x1080_8000k_4.m4v", + "https://dash.akamaized.net/akamai/bbb_30fps/bbb_30fps_1920x1080_8000k/bbb_30fps_1920x1080_8000k_4.m4v" +]; +const audioSegmentURLs = [ + "https://dash.akamaized.net/akamai/bbb_30fps/bbb_a64k/bbb_a64k_0.m4a", + "https://dash.akamaized.net/akamai/bbb_30fps/bbb_a64k/bbb_a64k_1.m4a", + "https://dash.akamaized.net/akamai/bbb_30fps/bbb_a64k/bbb_a64k_2.m4a", + "https://dash.akamaized.net/akamai/bbb_30fps/bbb_a64k/bbb_a64k_3.m4a", + "https://dash.akamaized.net/akamai/bbb_30fps/bbb_a64k/bbb_a64k_4.m4a" ]; -for (const segmentURL of segmentURLs) { - const segmentData = await (await fetch(segmentURL)).arrayBuffer(); - sourceBuffer.appendBuffer(segmentData); - await waitForEvent(sourceBuffer, "updateend"); + +AbortSignal.prototype.throwIfAborted ??= function throwIfAborted( + this: AbortSignal +) { + if (this.aborted) throw this.reason; +}; + +async function appendSegments( + sourceBuffer: BabySourceBuffer, + segmentURLs: string[] +) { + for (const segmentURL of segmentURLs) { + const segmentData = await (await fetch(segmentURL)).arrayBuffer(); + sourceBuffer.appendBuffer(segmentData); + await waitForEvent(sourceBuffer, "updateend"); + } } +await Promise.all([ + appendSegments(videoSourceBuffer, videoSegmentURLs), + appendSegments(audioSourceBuffer, audioSegmentURLs) +]); + interface Segment { url: string; startTime: number; @@ -54,10 +86,12 @@ interface Segment { isLast: boolean; } -const segmentDuration = 4; -const lastSegmentIndex = Math.ceil(streamDuration / segmentDuration) - 1; - -function getSegmentForTime(time: number): Segment | undefined { +function getSegmentForTime( + templateUrl: string, + segmentDuration: number, + time: number +): Segment | undefined { + const lastSegmentIndex = Math.ceil(streamDuration / segmentDuration) - 1; const segmentIndex = Math.max( 0, Math.min(lastSegmentIndex, Math.floor(time / segmentDuration)) @@ -65,30 +99,47 @@ function getSegmentForTime(time: number): Segment | undefined { if (segmentIndex < 0) { return undefined; } - const url = `https://dash.akamaized.net/akamai/bbb_30fps/bbb_30fps_1920x1080_8000k/bbb_30fps_1920x1080_8000k_${ - segmentIndex + 1 - }.m4v`; + const url = templateUrl.replace(/%INDEX%/, `${segmentIndex + 1}`); return { url, startTime: segmentIndex * segmentDuration, endTime: (segmentIndex + 1) * segmentDuration, isFirst: segmentIndex === 0, - isLast: segmentIndex === lastSegmentIndex, + isLast: segmentIndex === lastSegmentIndex }; } +function getVideoSegmentForTime(time: number): Segment | undefined { + return getSegmentForTime( + "https://dash.akamaized.net/akamai/bbb_30fps/bbb_30fps_1920x1080_8000k/bbb_30fps_1920x1080_8000k_%INDEX%.m4v", + 120 / 30, + time + ); +} + +function getAudioSegmentForTime(time: number): Segment | undefined { + return getSegmentForTime( + "https://dash.akamaized.net/akamai/bbb_30fps/bbb_a64k/bbb_a64k_%INDEX%.m4a", + 192512 / 48000, + time + ); +} + const forwardBufferSize = 30; const backwardBufferSize = 10; let pendingBufferLoop: Promise = Promise.resolve(); -async function bufferLoop(signal: AbortSignal) { - await pendingBufferLoop; +async function trackBufferLoop( + sourceBuffer: BabySourceBuffer, + segmentForTime: (time: number) => Segment | undefined, + signal: AbortSignal +) { while (true) { - if (signal.aborted) throw signal.reason; + signal.throwIfAborted(); // Check buffer health while (true) { - const currentRange = video.buffered.find(video.currentTime); + const currentRange = sourceBuffer.buffered.find(video.currentTime); const forward = video.playbackRate >= 0; if (!currentRange) { // No buffer, need new segment immediately @@ -109,20 +160,20 @@ async function bufferLoop(signal: AbortSignal) { await waitForEvent(video, ["timeupdate", "ratechange"], signal); } // Find next segment - const currentRange = video.buffered.find(video.currentTime); + const currentRange = sourceBuffer.buffered.find(video.currentTime); const forward = video.playbackRate >= 0; const nextTime = currentRange ? forward ? currentRange[1] : currentRange[0] - 0.001 : video.currentTime; - const nextSegment = getSegmentForTime(nextTime)!; + const nextSegment = segmentForTime(nextTime)!; // Remove old buffer before/after current time const retainStart = video.currentTime - (forward ? backwardBufferSize : forwardBufferSize); const retainEnd = video.currentTime + (forward ? forwardBufferSize : backwardBufferSize); - const oldBuffered = video.buffered.subtract( + const oldBuffered = sourceBuffer.buffered.subtract( new TimeRanges([[retainStart, retainEnd]]) ); for (let i = 0; i < oldBuffered.length; i++) { @@ -135,19 +186,33 @@ async function bufferLoop(signal: AbortSignal) { ).arrayBuffer(); sourceBuffer.appendBuffer(segmentData); await waitForEvent(sourceBuffer, "updateend"); + // Check if we're done buffering if (forward) { if (nextSegment.isLast) { - mediaSource.endOfStream(); - break; // Stop buffering until next seek + return; // Stop buffering until next seek } } else { if (nextSegment.isFirst) { - break; // Stop buffering until next seek + return; // Stop buffering until next seek } } } } +async function bufferLoop(signal: AbortSignal) { + await pendingBufferLoop; + await Promise.allSettled([ + trackBufferLoop(videoSourceBuffer, getVideoSegmentForTime, signal), + trackBufferLoop(audioSourceBuffer, getAudioSegmentForTime, signal) + ]); + signal.throwIfAborted(); + // All tracks are done buffering until the last segment + const forward = video.playbackRate >= 0; + if (forward) { + mediaSource.endOfStream(); + } +} + let bufferAbortController: AbortController = new AbortController(); function restartBuffering() { diff --git a/demo/vendor.d.ts b/demo/vendor.d.ts deleted file mode 100644 index 3b15dd7..0000000 --- a/demo/vendor.d.ts +++ /dev/null @@ -1 +0,0 @@ -declare module "media-chrome"; diff --git a/index.html b/index.html index b9fdd17..d58b92c 100644 --- a/index.html +++ b/index.html @@ -1,4 +1,4 @@ - + @@ -14,16 +14,42 @@ - + + + + - -

Source code on GitHub

+ +

+ Source code on GitHub +

+ + diff --git a/src/media-source.ts b/src/media-source.ts index 8981fad..5982fcb 100644 --- a/src/media-source.ts +++ b/src/media-source.ts @@ -1,4 +1,8 @@ -import { BabySourceBuffer, getVideoTrackBuffer } from "./source-buffer"; +import { + BabySourceBuffer, + getAudioTrackBuffer, + getVideoTrackBuffer +} from "./source-buffer"; import { BabyVideoElement, MediaReadyState, @@ -7,7 +11,7 @@ import { updateReadyState } from "./video-element"; import { queueTask } from "./util"; -import { VideoTrackBuffer } from "./track-buffer"; +import { AudioTrackBuffer, VideoTrackBuffer } from "./track-buffer"; import { setEndTimeOnLastRange, TimeRanges } from "./time-ranges"; export type MediaSourceReadyState = "closed" | "ended" | "open"; @@ -32,6 +36,9 @@ export let getBuffered: (mediaSource: BabyMediaSource) => TimeRanges; export let getActiveVideoTrackBuffer: ( mediaSource: BabyMediaSource ) => VideoTrackBuffer | undefined; +export let getActiveAudioTrackBuffer: ( + mediaSource: BabyMediaSource +) => AudioTrackBuffer | undefined; export let openIfEnded: (mediaSource: BabyMediaSource) => void; export let checkBuffer: (mediaSource: BabyMediaSource) => void; @@ -222,6 +229,16 @@ export class BabyMediaSource extends EventTarget { return undefined; } + #getActiveAudioTrackBuffer(): AudioTrackBuffer | undefined { + for (const sourceBuffer of this.#sourceBuffers) { + const audioTrackBuffer = getAudioTrackBuffer(sourceBuffer); + if (audioTrackBuffer) { + return audioTrackBuffer; + } + } + return undefined; + } + #getBuffered(): TimeRanges { // https://w3c.github.io/media-source/#htmlmediaelement-extensions-buffered // 2.1. Let recent intersection ranges equal an empty TimeRanges object. @@ -262,6 +279,8 @@ export class BabyMediaSource extends EventTarget { } const buffered = this.#getBuffered(); const currentTime = mediaElement.currentTime; + const duration = this.#duration; + const playbackRate = mediaElement.playbackRate; const currentRange = buffered.find(currentTime); // If HTMLMediaElement.buffered does not contain a TimeRanges for the current playback position: if (currentRange === undefined) { @@ -281,7 +300,7 @@ export class BabyMediaSource extends EventTarget { } // If HTMLMediaElement.buffered contains a TimeRanges that includes the current playback position // and some time beyond the current playback position, then run the following steps: - if (buffered.containsRange(currentTime, currentTime + 0.1)) { + if (hasSomeBuffer(buffered, currentTime, duration, playbackRate)) { // Set the HTMLMediaElement.readyState attribute to HAVE_FUTURE_DATA. // Playback may resume at this point if it was previously suspended by a transition to HAVE_CURRENT_DATA. updateReadyState(mediaElement, MediaReadyState.HAVE_FUTURE_DATA); @@ -312,6 +331,8 @@ export class BabyMediaSource extends EventTarget { openIfEnded = (mediaSource) => mediaSource.#openIfEnded(); getActiveVideoTrackBuffer = (mediaSource) => mediaSource.#getActiveVideoTrackBuffer(); + getActiveAudioTrackBuffer = (mediaSource) => + mediaSource.#getActiveAudioTrackBuffer(); checkBuffer = (mediaSource) => mediaSource.#checkBuffer(); } } @@ -319,3 +340,19 @@ export class BabyMediaSource extends EventTarget { function getHighestEndTime(buffered: TimeRanges): number { return buffered.length > 0 ? buffered.end(buffered.length - 1) : 0; } + +export function hasSomeBuffer( + buffered: TimeRanges, + currentTime: number, + duration: number, + playbackRate: number +): boolean { + if (playbackRate >= 0) { + return buffered.containsRange( + currentTime, + Math.min(currentTime + 0.1, duration) + ); + } else { + return buffered.containsRange(Math.max(0, currentTime - 0.1), currentTime); + } +} diff --git a/src/source-buffer.ts b/src/source-buffer.ts index 50c40f0..2cbabfb 100644 --- a/src/source-buffer.ts +++ b/src/source-buffer.ts @@ -8,6 +8,7 @@ import { DataStream, Info, ISOFile, + Mp4aBox, MP4ArrayBuffer, MP4BoxStream, Sample, @@ -20,6 +21,7 @@ import { durationChange, endOfStream, getMediaElement, + hasSomeBuffer, openIfEnded } from "./media-source"; import { @@ -37,6 +39,9 @@ import { setEndTimeOnLastRange, TimeRanges } from "./time-ranges"; export let getVideoTrackBuffer: ( sourceBuffer: BabySourceBuffer ) => VideoTrackBuffer | undefined; +export let getAudioTrackBuffer: ( + sourceBuffer: BabySourceBuffer +) => AudioTrackBuffer | undefined; export class BabySourceBuffer extends EventTarget { readonly #parent: BabyMediaSource; @@ -355,7 +360,9 @@ export class BabySourceBuffer extends EventTarget { return; } // * The codecs for each track are supported by the user agent. - const audioTrackConfigs = info.audioTracks.map(buildAudioConfig); + const audioTrackConfigs = info.audioTracks.map((trackInfo) => + buildAudioConfig(trackInfo, this.#isoFile!.getTrackById(trackInfo.id)) + ); const videoTrackConfigs = info.videoTracks.map((trackInfo) => buildVideoConfig(trackInfo, this.#isoFile!.getTrackById(trackInfo.id)) ); @@ -399,7 +406,9 @@ export class BabySourceBuffer extends EventTarget { // 5.1. If the initialization segment contains tracks with codecs // the user agent does not support, then run the append error // algorithm and abort these steps. - const audioTrackConfigs = info.audioTracks.map(buildAudioConfig); + const audioTrackConfigs = info.audioTracks.map((trackInfo) => + buildAudioConfig(trackInfo, this.#isoFile!.getTrackById(trackInfo.id)) + ); const videoTrackConfigs = info.videoTracks.map((trackInfo) => buildVideoConfig(trackInfo, this.#isoFile!.getTrackById(trackInfo.id)) ); @@ -506,6 +515,8 @@ export class BabySourceBuffer extends EventTarget { const mediaElement = getMediaElement(this.#parent)!; const buffered = mediaElement.buffered; const currentTime = mediaElement.currentTime; + const duration = this.#parent.duration; + const playbackRate = mediaElement.playbackRate; if ( mediaElement.readyState === MediaReadyState.HAVE_METADATA && buffered.contains(currentTime) @@ -518,7 +529,7 @@ export class BabySourceBuffer extends EventTarget { // attribute to HAVE_FUTURE_DATA. if ( mediaElement.readyState === MediaReadyState.HAVE_CURRENT_DATA && - buffered.containsRange(currentTime, currentTime + 0.1) + hasSomeBuffer(buffered, currentTime, duration, playbackRate) ) { updateReadyState(mediaElement, MediaReadyState.HAVE_FUTURE_DATA); } @@ -601,12 +612,12 @@ export class BabySourceBuffer extends EventTarget { if (trackBuffer.type === "video") { // 1. Let remove window timestamp equal the overlapped frame presentation timestamp // plus 1 microsecond. - const removeWindowTimestamp = overlappedFrame.timestamp! + 1; + const removeWindowTimestamp = overlappedFrame.timestamp + 1; // 2. If the presentation timestamp is less than the remove window timestamp, // then remove overlapped frame from track buffer. if (1e6 * pts < removeWindowTimestamp) { trackBuffer.removeSamples( - overlappedFrame.timestamp!, + overlappedFrame.timestamp, removeWindowTimestamp ); } @@ -738,8 +749,16 @@ export class BabySourceBuffer extends EventTarget { ); } + #getAudioTrackBuffer(): AudioTrackBuffer | undefined { + return this.#trackBuffers.find( + (trackBuffer): trackBuffer is AudioTrackBuffer => + trackBuffer instanceof AudioTrackBuffer + ); + } + static { getVideoTrackBuffer = (sourceBuffer) => sourceBuffer.#getVideoTrackBuffer(); + getAudioTrackBuffer = (sourceBuffer) => sourceBuffer.#getAudioTrackBuffer(); } } @@ -747,11 +766,15 @@ function toMP4ArrayBuffer(ab: ArrayBuffer, fileStart: number): MP4ArrayBuffer { return Object.assign(ab, { fileStart }); } -function buildAudioConfig(info: AudioTrackInfo): AudioDecoderConfig { +function buildAudioConfig( + info: AudioTrackInfo, + trak: TrakBox +): AudioDecoderConfig { return { codec: info.codec, numberOfChannels: info.audio.channel_count, - sampleRate: info.audio.sample_rate + sampleRate: info.audio.sample_rate, + description: getAudioSpecificConfig(trak) }; } @@ -771,6 +794,10 @@ function isAvcEntry(entry: Box): entry is AvcBox { return (entry as AvcBox).avcC !== undefined; } +function isMp4aEntry(entry: Box): entry is Mp4aBox { + return entry.type === "mp4a"; +} + function createAvcDecoderConfigurationRecord( trak: TrakBox ): Uint8Array | undefined { @@ -784,6 +811,21 @@ function createAvcDecoderConfigurationRecord( return new Uint8Array(stream.buffer, 8); // remove the box header } +function getAudioSpecificConfig(trak: TrakBox): Uint8Array | undefined { + const descriptor = + trak.mdia.minf.stbl.stsd.entries.find(isMp4aEntry)?.esds.esd.descs[0]; + if (!descriptor) { + return undefined; + } + // 0x04 is the DecoderConfigDescrTag. Assuming MP4Box always puts this at position 0. + console.assert(descriptor.tag == 0x04); + // 0x40 is the Audio OTI, per table 5 of ISO 14496-1 + console.assert(descriptor.oti == 0x40); + // 0x05 is the DecSpecificInfoTag + console.assert(descriptor.descs[0].tag == 0x05); + return descriptor.descs[0].data; +} + function hasMatchingTrackIds( newTracks: readonly TrackInfo[], oldTracks: readonly TrackInfo[] diff --git a/src/track-buffer.ts b/src/track-buffer.ts index 92c56bb..06d13f7 100644 --- a/src/track-buffer.ts +++ b/src/track-buffer.ts @@ -1,6 +1,6 @@ import { TimeRanges } from "./time-ranges"; import { Sample } from "mp4box"; -import { Direction, insertSorted } from "./util"; +import { arrayRemoveAt, Direction, insertSorted } from "./util"; const BUFFERED_TOLERANCE: number = 1 / 60; @@ -82,6 +82,8 @@ export abstract class TrackBuffer { abstract findFrameForTime(time: number): T | undefined; + abstract hasFrame(frame: T): boolean; + abstract getDecodeDependenciesForFrame(frame: T): DecodeQueue; abstract getNextFrames( @@ -130,6 +132,10 @@ export class AudioTrackBuffer extends TrackBuffer { ); } + hasFrame(frame: EncodedAudioChunk): boolean { + return this.#frames.includes(frame); + } + getDecodeDependenciesForFrame(frame: EncodedAudioChunk): AudioDecodeQueue { return { frames: [frame], @@ -140,21 +146,38 @@ export class AudioTrackBuffer extends TrackBuffer { getNextFrames( frame: EncodedAudioChunk, maxAmount: number, - _direction: Direction - ): DecodeQueue | undefined { + direction: Direction + ): AudioDecodeQueue | undefined { const frameIndex = this.#frames.indexOf(frame); - if (frameIndex < 0 || frameIndex === this.#frames.length - 1) { + if (frameIndex < 0) { return undefined; } - const nextIndex = frameIndex + 1; - return { - frames: this.#frames.slice(nextIndex, nextIndex + maxAmount), - codecConfig: this.codecConfig - }; + if (direction === Direction.FORWARD) { + const nextIndex = frameIndex + 1; + if (nextIndex >= this.#frames.length) { + return undefined; + } + return { + frames: this.#frames.slice(nextIndex, nextIndex + maxAmount), + codecConfig: this.codecConfig + }; + } else { + const nextIndex = frameIndex - 1; + if (nextIndex < 0) { + return undefined; + } + return { + frames: this.#frames.slice( + Math.max(0, nextIndex - maxAmount), + nextIndex + ), + codecConfig: this.codecConfig + }; + } } getRandomAccessPointAtOrAfter(timeInMicros: number): number | undefined { - return this.#frames.find((frame) => frame.timestamp! >= timeInMicros) + return this.#frames.find((frame) => frame.timestamp >= timeInMicros) ?.timestamp; } @@ -163,7 +186,7 @@ export class AudioTrackBuffer extends TrackBuffer { for (let i = this.#frames.length - 1; i >= 0; i--) { const frame = this.#frames[i]; if (frame.timestamp >= startInMicros && frame.timestamp < endInMicros) { - this.#frames.splice(i, 1); + arrayRemoveAt(this.#frames, i); didRemove = true; } } @@ -175,8 +198,8 @@ export class AudioTrackBuffer extends TrackBuffer { #updateTrackBufferRanges(): void { this.trackBufferRanges = new TimeRanges( this.#frames.map((frame) => [ - frame.timestamp! / 1e6, - (frame.timestamp! + frame.duration!) / 1e6 + frame.timestamp / 1e6, + (frame.timestamp + frame.duration!) / 1e6 ]) ).mergeOverlaps(BUFFERED_TOLERANCE); } @@ -238,6 +261,10 @@ export class VideoTrackBuffer extends TrackBuffer { this.#currentGop = undefined; } + hasFrame(frame: EncodedAudioChunk): boolean { + return this.#gops.some((gop) => gop.frames.includes(frame)); + } + findFrameForTime(time: number): EncodedVideoChunk | undefined { const timeInMicros = time * 1e6; const containingGop = this.#gops.find((gop) => { @@ -271,7 +298,7 @@ export class VideoTrackBuffer extends TrackBuffer { frame: EncodedVideoChunk, maxAmount: number, direction: Direction - ): DecodeQueue | undefined { + ): VideoDecodeQueue | undefined { let gopIndex = this.#gops.findIndex((gop) => { return gop.frames.includes(frame); })!; @@ -338,12 +365,12 @@ export class VideoTrackBuffer extends TrackBuffer { // Keep entire GOP. } else if (removeFrom === 0) { // Remove entire GOP. - this.#gops.splice(i, 1); + arrayRemoveAt(this.#gops, i); didRemove = true; } else { // Remove some frames. const lastFrame = gop.frames[removeFrom - 1]; - gop.end = lastFrame.timestamp! + lastFrame.duration!; + gop.end = lastFrame.timestamp + lastFrame.duration!; gop.frames.splice(removeFrom); didRemove = true; } diff --git a/src/util.ts b/src/util.ts index 18540d2..3997893 100644 --- a/src/util.ts +++ b/src/util.ts @@ -10,7 +10,7 @@ export function toUint8Array(data: BufferSource): Uint8Array { export function concatUint8Arrays( left: Uint8Array, - right: Uint8Array, + right: Uint8Array ): Uint8Array { const result = new Uint8Array(left.byteLength + right.byteLength); result.set(left, 0); @@ -18,6 +18,20 @@ export function concatUint8Arrays( return result; } +export function arrayRemove(array: T[], element: T): void { + arrayRemoveAt(array, array.indexOf(element)); +} + +export function arrayRemoveAt(array: T[], index: number): void { + if (index < 0) { + return; + } else if (index === 0) { + array.shift(); + } else { + array.splice(index, 1); + } +} + export function queueTask(fn: () => void): void { setTimeout(fn, 0); } @@ -25,7 +39,7 @@ export function queueTask(fn: () => void): void { export function waitForEvent( target: EventTarget, types: string | string[], - signal?: AbortSignal, + signal?: AbortSignal ): Promise { types = Array.isArray(types) ? types : [types]; return new Promise((resolve, reject) => { @@ -56,7 +70,7 @@ export function isDefined(x: T | undefined): x is T { export function binarySearch( array: readonly T[], key: number, - keySelector: (v: T) => number, + keySelector: (v: T) => number ): number { // Original from TypeScript by Microsoft // License: Apache 2.0 @@ -81,7 +95,7 @@ export function insertSorted( array: T[], insert: T, keySelector: (v: T) => number, - allowDuplicates?: boolean, + allowDuplicates?: boolean ): void { // Original from TypeScript by Microsoft // License: Apache 2.0 @@ -144,5 +158,5 @@ export class Deferred { export enum Direction { FORWARD = 1, - BACKWARD = -1, + BACKWARD = -1 } diff --git a/src/vendor/mp4box.d.ts b/src/vendor/mp4box.d.ts index 1fb7e0e..71a8278 100644 --- a/src/vendor/mp4box.d.ts +++ b/src/vendor/mp4box.d.ts @@ -188,6 +188,32 @@ declare module "mp4box" { type: "avcC"; } + export interface Mp4aBox extends Box { + type: "mp4a"; + esds: EsdsBox; + } + + export interface EsdsBox extends Box { + type: "esds"; + esd: ES_Descriptor; + } + + export interface ES_Descriptor { + descs: [DecoderConfigDescriptor, ...any[]]; + } + + export interface DecoderConfigDescriptor { + oti: number; + streamType: number; + tag: number; + descs: [DecoderSpecificInfo, ...any[]]; + } + + export interface DecoderSpecificInfo { + tag: number; + data: Uint8Array; + } + export interface ExtractionOptions { nbSamples?: number; } diff --git a/src/video-element.ts b/src/video-element.ts index 3172c2b..4600473 100644 --- a/src/video-element.ts +++ b/src/video-element.ts @@ -4,12 +4,24 @@ import { BabyMediaSource, checkBuffer, detachFromMediaElement, + getActiveAudioTrackBuffer, getActiveVideoTrackBuffer, getBuffered } from "./media-source"; -import { Deferred, Direction, queueTask, waitForEvent } from "./util"; +import { + arrayRemove, + arrayRemoveAt, + Deferred, + Direction, + queueTask, + waitForEvent +} from "./util"; import { TimeRange, TimeRanges } from "./time-ranges"; -import { VideoDecodeQueue } from "./track-buffer"; +import { + AudioDecodeQueue, + EncodedChunk, + VideoDecodeQueue +} from "./track-buffer"; const template = document.createElement("template"); template.innerHTML = ``; @@ -45,16 +57,19 @@ export class BabyVideoElement extends HTMLElement { #currentTime: number = 0; #duration: number = NaN; #ended: boolean = false; + #muted: boolean = false; #paused: boolean = true; #playbackRate: number = 1; #played: TimeRanges = new TimeRanges([]); #readyState: MediaReadyState = MediaReadyState.HAVE_NOTHING; #seeking: boolean = false; #srcObject: BabyMediaSource | undefined; + #volume: number = 1; #pendingPlayPromises: Array> = []; #advanceLoop: number = 0; #lastAdvanceTime: number = 0; + #lastAudioTimestamp: number = 0; #lastPlayedTime: number = NaN; #lastTimeUpdate: number = 0; #lastProgress: number = 0; @@ -68,10 +83,28 @@ export class BabyVideoElement extends HTMLElement { #furthestDecodingVideoFrame: EncodedVideoChunk | undefined = undefined; #decodingVideoFrames: EncodedVideoChunk[] = []; #decodedVideoFrames: VideoFrame[] = []; - #nextDecodedFramePromise: Deferred | undefined = undefined; + #nextDecodedVideoFramePromise: Deferred | undefined = undefined; #lastRenderedFrame: number | undefined = undefined; #nextRenderFrame: number = 0; + readonly #audioDecoder: AudioDecoder; + #lastAudioDecoderConfig: AudioDecoderConfig | undefined = undefined; + #audioDecoderTimestamp: number = 0; + #furthestDecodedAudioFrame: EncodedAudioChunk | undefined = undefined; + #decodingAudioFrames: EncodedAudioChunk[] = []; + #nextDecodedAudioFramePromise: Deferred | undefined = undefined; + #originalDecodingAudioFrames: WeakMap = + new WeakMap(); + #decodedAudioFrames: AudioData[] = []; + + #audioContext: AudioContext | undefined; + #lastScheduledAudioFrameTime: number = -1; + #scheduledAudioSourceNodes: Array<{ + node: AudioBufferSourceNode; + timestamp: number; + }> = []; + #volumeGainNode: GainNode | undefined; + constructor() { super(); @@ -95,6 +128,11 @@ export class BabyVideoElement extends HTMLElement { output: (frame) => this.#onVideoFrame(frame), error: (error) => console.error("WTF", error) }); + + this.#audioDecoder = new AudioDecoder({ + output: (data) => this.#onAudioData(data), + error: (error) => console.error("WTF", error) + }); } connectedCallback(): void { @@ -142,6 +180,18 @@ export class BabyVideoElement extends HTMLElement { return this.#ended && this.#playbackRate >= 0; } + get muted(): boolean { + return this.#muted; + } + + set muted(value: boolean) { + if (this.#muted !== value) { + this.#muted = value; + this.dispatchEvent(new Event("volumechange")); + this.#updateVolume(); + } + } + get paused(): boolean { return this.#paused; } @@ -157,11 +207,13 @@ export class BabyVideoElement extends HTMLElement { const currentTime = this.#getCurrentPlaybackPosition(performance.now()); if (Math.sign(value) !== Math.sign(this.#playbackRate)) { this.#resetVideoDecoder(); + this.#resetAudioDecoder(); } this.#playbackRate = value; this.#updateCurrentTime(currentTime); this.#updatePlaying(); this.#updatePlayed(); + this.#updateAudioPlaybackRate(); this.dispatchEvent(new Event("ratechange")); } @@ -200,6 +252,7 @@ export class BabyVideoElement extends HTMLElement { this.#seeking = false; this.#seekAbortController.abort(); this.#lastAdvanceTime = 0; + this.#lastAudioTimestamp = 0; this.#lastProgress = 0; this.#lastPlayedTime = NaN; clearTimeout(this.#nextProgressTimer); @@ -219,6 +272,18 @@ export class BabyVideoElement extends HTMLElement { return this.#canvas.height; } + get volume(): number { + return this.#volume; + } + + set volume(value: number) { + if (this.#volume !== value) { + this.#volume = value; + this.dispatchEvent(new Event("volumechange")); + this.#updateVolume(); + } + } + load(): void { // TODO } @@ -341,13 +406,16 @@ export class BabyVideoElement extends HTMLElement { #updatePlaying(): void { if (this.#isPotentiallyPlaying() && !this.#seeking) { + void this.#audioContext?.resume(); if (this.#advanceLoop === 0) { this.#lastAdvanceTime = performance.now(); + this.#lastAudioTimestamp = this.#audioContext?.currentTime ?? 0; this.#advanceLoop = requestAnimationFrame((now) => { this.#advanceCurrentTime(now); }); } } else if (this.#advanceLoop !== 0) { + void this.#audioContext?.suspend(); cancelAnimationFrame(this.#advanceLoop); this.#advanceLoop = 0; } @@ -358,20 +426,31 @@ export class BabyVideoElement extends HTMLElement { // its current playback position must increase monotonically at the element's playbackRate units // of media time per unit time of the media timeline's clock. if (this.#isPotentiallyPlaying() && !this.#seeking) { + let elapsedTime: number; + // Use audio clock as sync, otherwise use wall-clock time. + if ( + this.#audioContext !== undefined && + this.#audioContext.state === "running" + ) { + elapsedTime = this.#audioContext.currentTime - this.#lastAudioTimestamp; + } else { + elapsedTime = (now - this.#lastAdvanceTime) / 1000; + } const newTime = - this.#currentTime + - (this.#playbackRate * Math.max(0, now - this.#lastAdvanceTime)) / 1000; + this.#currentTime + this.#playbackRate * Math.max(0, elapsedTime); // Do not advance outside the current buffered range. - const currentRange = this.buffered.find(this.#currentTime)!; - return Math.min(Math.max(currentRange[0], newTime), currentRange[1]); - } else { - return this.#currentTime; + const currentRange = this.buffered.find(this.#currentTime); + if (currentRange !== undefined) { + return Math.min(Math.max(currentRange[0], newTime), currentRange[1]); + } } + return this.#currentTime; } #updateCurrentTime(currentTime: number) { this.#currentTime = currentTime; this.#decodeVideoFrames(); + this.#decodeAudio(); if (this.#srcObject) { checkBuffer(this.#srcObject); } @@ -432,7 +511,9 @@ export class BabyVideoElement extends HTMLElement { #advanceCurrentTime(now: number): void { this.#updateCurrentTime(this.#getCurrentPlaybackPosition(now)); this.#renderVideoFrame(); + this.#renderAudio(); this.#lastAdvanceTime = now; + this.#lastAudioTimestamp = this.#audioContext?.currentTime ?? 0; this.#timeMarchesOn(true, now); if (this.#isPotentiallyPlaying() && !this.#seeking) { this.#advanceLoop = requestAnimationFrame((now) => @@ -482,6 +563,7 @@ export class BabyVideoElement extends HTMLElement { queueTask(() => this.dispatchEvent(new Event("seeking"))); // 11. Set the current playback position to the new playback position. this.#resetVideoDecoder(); + this.#resetAudioDecoder(); this.#updateCurrentTime(newPosition); this.#updatePlaying(); // 12. Wait until the user agent has established whether or not the media data for the new playback position @@ -503,8 +585,10 @@ export class BabyVideoElement extends HTMLElement { while (true) { if (this.#readyState <= MediaReadyState.HAVE_CURRENT_DATA) { await waitForEvent(this, "canplay", signal); - } else if (!this.#hasDecodedFrameAtTime(timeInMicros)) { - await this.#waitForNextDecodedFrame(signal); + } else if (!this.#hasDecodedVideoFrameAtTime(timeInMicros)) { + await this.#waitForNextDecodedVideoFrame(signal); + } else if (!this.#hasDecodedAudioFrameAtTime(timeInMicros)) { + await this.#waitForNextDecodedAudioFrame(signal); } else { break; } @@ -522,18 +606,35 @@ export class BabyVideoElement extends HTMLElement { queueTask(() => this.dispatchEvent(new Event("seeked"))); } - #hasDecodedFrameAtTime(timeInMicros: number): boolean { - return this.#decodedVideoFrames.some( - (frame) => - frame.timestamp! <= timeInMicros && - timeInMicros < frame.timestamp! + frame.duration! + #hasDecodedVideoFrameAtTime(timeInMicros: number): boolean { + return this.#hasDecodedFrameAtTime(this.#decodedVideoFrames, timeInMicros); + } + + #hasDecodedAudioFrameAtTime(timeInMicros: number): boolean { + return this.#hasDecodedFrameAtTime(this.#decodedAudioFrames, timeInMicros); + } + + #hasDecodedFrameAtTime( + frames: ReadonlyArray, + timeInMicros: number + ): boolean { + return frames.some( + (frame: VideoFrame | AudioData) => + frame.timestamp <= timeInMicros && + timeInMicros < frame.timestamp + frame.duration! ); } - async #waitForNextDecodedFrame(signal: AbortSignal): Promise { - this.#nextDecodedFramePromise = new Deferred(); - this.#nextDecodedFramePromise.follow(signal); - await this.#nextDecodedFramePromise.promise; + async #waitForNextDecodedVideoFrame(signal: AbortSignal): Promise { + this.#nextDecodedVideoFramePromise = new Deferred(); + this.#nextDecodedVideoFramePromise.follow(signal); + await this.#nextDecodedVideoFramePromise.promise; + } + + async #waitForNextDecodedAudioFrame(signal: AbortSignal): Promise { + this.#nextDecodedAudioFramePromise = new Deferred(); + this.#nextDecodedAudioFramePromise.follow(signal); + await this.#nextDecodedAudioFramePromise.promise; } #decodeVideoFrames(): void { @@ -547,6 +648,14 @@ export class BabyVideoElement extends HTMLElement { } const direction = this.#playbackRate < 0 ? Direction.BACKWARD : Direction.FORWARD; + // Check if last decoded frame still exists, i.e. it was not overwritten + // or removed from the SourceBuffer + if ( + this.#furthestDecodingVideoFrame !== undefined && + !videoTrackBuffer.hasFrame(this.#furthestDecodingVideoFrame) + ) { + this.#furthestDecodingVideoFrame = undefined; + } // Decode frames for current time if (this.#furthestDecodingVideoFrame === undefined) { const frameAtTime = videoTrackBuffer.findFrameForTime(this.currentTime); @@ -580,28 +689,30 @@ export class BabyVideoElement extends HTMLElement { decodeQueue: VideoDecodeQueue, direction: Direction ): void { + const { frames, codecConfig } = decodeQueue; if ( this.#videoDecoder.state === "unconfigured" || - this.#lastVideoDecoderConfig !== decodeQueue.codecConfig + this.#lastVideoDecoderConfig !== codecConfig ) { - this.#videoDecoder.configure(decodeQueue.codecConfig); - this.#lastVideoDecoderConfig = decodeQueue.codecConfig; + this.#videoDecoder.configure(codecConfig); + this.#lastVideoDecoderConfig = codecConfig; } - for (const frame of decodeQueue.frames) { + for (const frame of frames) { this.#videoDecoder.decode(frame); this.#decodingVideoFrames.push(frame); } + // The "furthest decoded frame" depends on the rendering order, + // since we must decode the frames in their original order. if (direction == Direction.FORWARD) { - this.#furthestDecodingVideoFrame = - decodeQueue.frames[decodeQueue.frames.length - 1]; + this.#furthestDecodingVideoFrame = frames[frames.length - 1]; } else { - this.#furthestDecodingVideoFrame = decodeQueue.frames[0]; + this.#furthestDecodingVideoFrame = frames[0]; } } async #onVideoFrame(frame: VideoFrame) { - const decodingFrameIndex = this.#decodingVideoFrames.findIndex( - (x) => x.timestamp === frame.timestamp + const decodingFrameIndex = this.#decodingVideoFrames.findIndex((x) => + isFrameTimestampEqual(x, frame) ); if (decodingFrameIndex < 0) { // Drop frames that are no longer in the decode queue. @@ -609,14 +720,12 @@ export class BabyVideoElement extends HTMLElement { return; } const decodingFrame = this.#decodingVideoFrames[decodingFrameIndex]; - this.#decodingVideoFrames.splice(decodingFrameIndex, 1); + arrayRemoveAt(this.#decodingVideoFrames, decodingFrameIndex); // Drop frames that are beyond current time, since we're too late to render them. const currentTimeInMicros = 1e6 * this.#currentTime; const direction = this.#playbackRate < 0 ? Direction.BACKWARD : Direction.FORWARD; - if ( - this.#isFrameBeyondTime(decodingFrame, direction, currentTimeInMicros) - ) { + if (isFrameBeyondTime(decodingFrame, direction, currentTimeInMicros)) { frame.close(); // Decode more frames (if we now have more space in the queue) this.#decodeVideoFrames(); @@ -633,9 +742,9 @@ export class BabyVideoElement extends HTMLElement { frame.close(); frame = newFrame; this.#decodedVideoFrames.push(newFrame); - if (this.#nextDecodedFramePromise) { - this.#nextDecodedFramePromise.resolve(); - this.#nextDecodedFramePromise = undefined; + if (this.#nextDecodedVideoFramePromise) { + this.#nextDecodedVideoFramePromise.resolve(); + this.#nextDecodedVideoFramePromise = undefined; } // Schedule render immediately if this is the first decoded frame after a seek if (this.#lastRenderedFrame === undefined) { @@ -645,18 +754,6 @@ export class BabyVideoElement extends HTMLElement { this.#decodeVideoFrames(); } - #isFrameBeyondTime( - frame: EncodedVideoChunk | VideoFrame, - direction: Direction, - timeInMicros: number - ): boolean { - if (direction == Direction.FORWARD) { - return frame.timestamp! + frame.duration! <= timeInMicros; - } else { - return frame.timestamp! >= timeInMicros; - } - } - #scheduleRenderVideoFrame() { if (this.#nextRenderFrame === 0) { this.#nextRenderFrame = requestAnimationFrame(() => @@ -673,33 +770,36 @@ export class BabyVideoElement extends HTMLElement { // Drop all frames that are before current time, since we're too late to render them. for (let i = this.#decodedVideoFrames.length - 1; i >= 0; i--) { const frame = this.#decodedVideoFrames[i]; - if (this.#isFrameBeyondTime(frame, direction, currentTimeInMicros)) { + if (isFrameBeyondTime(frame, direction, currentTimeInMicros)) { frame.close(); - this.#decodedVideoFrames.splice(i, 1); + arrayRemoveAt(this.#decodedVideoFrames, i); } } // Render the frame at current time. let currentFrameIndex = this.#decodedVideoFrames.findIndex((frame) => { return ( - frame.timestamp! <= currentTimeInMicros && - currentTimeInMicros < frame.timestamp! + frame.duration! + frame.timestamp <= currentTimeInMicros && + currentTimeInMicros < frame.timestamp + frame.duration! ); }); - if (currentFrameIndex >= 0) { - const frame = this.#decodedVideoFrames[currentFrameIndex]; - if (this.#lastRenderedFrame !== frame.timestamp!) { - this.#updateSize(frame.displayWidth, frame.displayHeight); - this.#canvasContext.drawImage( - frame, - 0, - 0, - frame.displayWidth, - frame.displayHeight - ); - this.#decodedVideoFrames.splice(currentFrameIndex, 1); - this.#lastRenderedFrame = frame.timestamp!; - frame.close(); - } + if (currentFrameIndex < 0) { + // Decode more frames (if we now have more space in the queue) + this.#decodeVideoFrames(); + return; + } + const frame = this.#decodedVideoFrames[currentFrameIndex]; + if (this.#lastRenderedFrame !== frame.timestamp) { + this.#updateSize(frame.displayWidth, frame.displayHeight); + this.#canvasContext.drawImage( + frame, + 0, + 0, + frame.displayWidth, + frame.displayHeight + ); + arrayRemoveAt(this.#decodedVideoFrames, currentFrameIndex); + this.#lastRenderedFrame = frame.timestamp; + frame.close(); } // Decode more frames (if we now have more space in the queue) this.#decodeVideoFrames(); @@ -717,6 +817,344 @@ export class BabyVideoElement extends HTMLElement { this.#videoDecoder.reset(); } + #decodeAudio(): void { + const mediaSource = this.#srcObject; + if (!mediaSource) { + return; + } + const audioTrackBuffer = getActiveAudioTrackBuffer(mediaSource); + if (!audioTrackBuffer) { + return; + } + const direction = + this.#playbackRate < 0 ? Direction.BACKWARD : Direction.FORWARD; + // Check if last decoded frame still exists, i.e. it was not overwritten + // or removed from the SourceBuffer + if ( + this.#furthestDecodedAudioFrame !== undefined && + !audioTrackBuffer.hasFrame(this.#furthestDecodedAudioFrame) + ) { + this.#furthestDecodedAudioFrame = undefined; + } + // Decode audio for current time + if (this.#furthestDecodedAudioFrame === undefined) { + const frameAtTime = audioTrackBuffer.findFrameForTime(this.currentTime); + if (frameAtTime === undefined) { + return; + } + this.#processAudioDecodeQueue( + audioTrackBuffer.getDecodeDependenciesForFrame(frameAtTime), + direction + ); + } + // Decode next frames in advance + while ( + this.#decodingAudioFrames.length + this.#decodedAudioFrames.length < + decodeQueueLwm + ) { + const nextQueue = audioTrackBuffer.getNextFrames( + this.#furthestDecodedAudioFrame!, + decodeQueueHwm - + (this.#decodingAudioFrames.length + this.#decodedAudioFrames.length), + direction + ); + if (nextQueue === undefined) { + break; + } + this.#processAudioDecodeQueue(nextQueue, direction); + } + } + + #processAudioDecodeQueue( + decodeQueue: AudioDecodeQueue, + direction: Direction + ): void { + const { frames, codecConfig } = decodeQueue; + if ( + this.#audioDecoder.state === "unconfigured" || + this.#lastAudioDecoderConfig !== codecConfig + ) { + this.#audioDecoder.configure(codecConfig); + this.#lastAudioDecoderConfig = codecConfig; + } + if (direction === Direction.BACKWARD) { + // Audio has no dependencies between frames, so we can decode them + // in the same order as they will be rendered. + frames.reverse(); + } + for (const frame of frames) { + // AudioDecoder does not always preserve EncodedAudioChunk.timestamp + // to the decoded AudioData.timestamp, instead it adds up the sample durations + // since the last decoded chunk. This breaks reverse playback, since we + // intentionally feed the decoder chunks in the "wrong" order. + // Copy to a new chunk with the same *increasing* timestamp, + // and fix the timestamp later on. + const newFrame = cloneEncodedAudioChunk( + frame, + this.#audioDecoderTimestamp + ); + this.#originalDecodingAudioFrames.set(newFrame, frame); + this.#audioDecoder.decode(newFrame); + this.#decodingAudioFrames.push(newFrame); + this.#audioDecoderTimestamp += frame.duration!; + } + // The "furthest audio frame" is always the last one, + // since we decode them in rendering order (see above). + this.#furthestDecodedAudioFrame = frames[frames.length - 1]; + } + + #onAudioData(frame: AudioData): void { + const decodingFrameIndex = this.#decodingAudioFrames.findIndex((x) => + isFrameTimestampEqual(x, frame) + ); + if (decodingFrameIndex < 0) { + // Drop frames that are no longer in the decode queue. + frame.close(); + return; + } + const decodingFrame = this.#decodingAudioFrames[decodingFrameIndex]; + arrayRemoveAt(this.#decodingAudioFrames, decodingFrameIndex); + // Restore original timestamp + const decodedFrame = cloneAudioData( + frame, + this.#originalDecodingAudioFrames.get(decodingFrame)!.timestamp + ); + frame.close(); + // Drop frames that are beyond current time, since we're too late to render them. + const currentTimeInMicros = 1e6 * this.#currentTime; + const direction = + this.#playbackRate < 0 ? Direction.BACKWARD : Direction.FORWARD; + if (isFrameBeyondTime(decodedFrame, direction, currentTimeInMicros)) { + decodedFrame.close(); + // Decode more frames (if we now have more space in the queue) + this.#decodeAudio(); + return; + } + this.#decodedAudioFrames.push(decodedFrame); + if (this.#nextDecodedAudioFramePromise) { + this.#nextDecodedAudioFramePromise.resolve(); + this.#nextDecodedAudioFramePromise = undefined; + } + // Decode more frames (if we now have more space in the queue) + this.#decodeAudio(); + } + + #initializeAudio(sampleRate: number): AudioContext { + this.#audioContext = new AudioContext({ + sampleRate: sampleRate, + latencyHint: "playback" + }); + + this.#volumeGainNode = new GainNode(this.#audioContext); + this.#volumeGainNode.connect(this.#audioContext.destination); + this.#updateVolume(); + + if (this.#isPotentiallyPlaying() && !this.#seeking) { + void this.#audioContext.resume(); + } else { + void this.#audioContext.suspend(); + } + + return this.#audioContext; + } + + #renderAudio() { + const currentTimeInMicros = 1e6 * this.#currentTime; + const direction = + this.#playbackRate < 0 ? Direction.BACKWARD : Direction.FORWARD; + // Drop all frames that are before current time, since we're too late to render them. + for (let i = this.#decodedAudioFrames.length - 1; i >= 0; i--) { + const frame = this.#decodedAudioFrames[i]; + if (isFrameBeyondTime(frame, direction, currentTimeInMicros)) { + frame.close(); + arrayRemoveAt(this.#decodedAudioFrames, i); + } + } + // Don't render audio while playback is stopped. + if (this.#playbackRate === 0) return; + let nextFrameIndex: number = -1; + if (this.#lastScheduledAudioFrameTime >= 0) { + // Render the next frame. + nextFrameIndex = this.#decodedAudioFrames.findIndex((frame) => { + if (direction === Direction.BACKWARD) { + return ( + frame.timestamp + frame.duration === + this.#lastScheduledAudioFrameTime + ); + } else { + return frame.timestamp === this.#lastScheduledAudioFrameTime; + } + }); + } + if (nextFrameIndex < 0) { + // Render the frame at current time. + nextFrameIndex = this.#decodedAudioFrames.findIndex((frame) => { + return ( + frame.timestamp <= currentTimeInMicros && + currentTimeInMicros < frame.timestamp + frame.duration + ); + }); + } + if (nextFrameIndex < 0) { + // Decode more frames (if we now have more space in the queue) + this.#decodeAudio(); + return; + } + // Collect as many consecutive audio frames as possible + // to schedule in a single batch. + const firstFrame = this.#decodedAudioFrames[nextFrameIndex]; + const frames: AudioData[] = [firstFrame]; + for ( + let frameIndex = nextFrameIndex + 1; + frameIndex < this.#decodedAudioFrames.length; + frameIndex++ + ) { + const frame = this.#decodedAudioFrames[frameIndex]; + const previousFrame = frames[frames.length - 1]; + if ( + isConsecutiveAudioFrame(previousFrame, frame, direction) && + frame.format === firstFrame.format && + frame.numberOfChannels === firstFrame.numberOfChannels && + frame.sampleRate === firstFrame.sampleRate + ) { + // This frame is consecutive with the previous frame. + frames.push(frame); + } else { + // This frame is not consecutive. We can't schedule this in the same batch. + break; + } + } + if (direction === Direction.BACKWARD) { + frames.reverse(); + } + this.#audioContext ??= this.#initializeAudio(firstFrame.sampleRate); + this.#renderAudioFrame(frames, currentTimeInMicros, direction); + // Decode more frames (if we now have more space in the queue) + this.#decodeAudio(); + } + + #renderAudioFrame( + frames: AudioData[], + currentTimeInMicros: number, + direction: Direction + ) { + const firstFrame = frames[0]; + const lastFrame = frames[frames.length - 1]; + let firstTimestamp: number; + let lastTimestamp: number; + if (direction === Direction.BACKWARD) { + firstTimestamp = lastFrame.timestamp + lastFrame.duration; + lastTimestamp = firstFrame.timestamp; + } else { + firstTimestamp = firstFrame.timestamp; + lastTimestamp = lastFrame.timestamp + lastFrame.duration; + } + // Create an AudioBuffer containing all frame data + const { numberOfChannels, sampleRate } = frames[0]; + const audioBuffer = new AudioBuffer({ + numberOfChannels, + length: frames.reduce( + (totalFrames, frame) => totalFrames + frame.numberOfFrames, + 0 + ), + sampleRate + }); + for (let channel = 0; channel < numberOfChannels; channel++) { + const options: AudioDataCopyToOptions = { + format: "f32-planar", + planeIndex: channel + }; + const destination = audioBuffer.getChannelData(channel); + let offset = 0; + for (const frame of frames) { + const size = + frame.allocationSize(options) / Float32Array.BYTES_PER_ELEMENT; + frame.copyTo(destination.subarray(offset, offset + size), options); + offset += size; + } + if (direction === Direction.BACKWARD) { + // For reverse playback, reverse the order of the individual samples. + destination.reverse(); + } + } + // Schedule an AudioBufferSourceNode to play the AudioBuffer + this.#scheduleAudioBuffer( + audioBuffer, + firstTimestamp, + currentTimeInMicros, + this.#playbackRate + ); + this.#lastScheduledAudioFrameTime = lastTimestamp; + // Close the frames, so the audio decoder can reclaim them. + for (let i = frames.length - 1; i >= 0; i--) { + const frame = frames[i]; + frame.close(); + arrayRemove(this.#decodedAudioFrames, frame); + } + } + + #resetAudioDecoder(): void { + for (const frame of this.#decodedAudioFrames) { + frame.close(); + } + for (const audioSourceNode of this.#scheduledAudioSourceNodes) { + audioSourceNode.node.stop(); + } + this.#lastAudioDecoderConfig = undefined; + this.#audioDecoderTimestamp = 0; + this.#furthestDecodedAudioFrame = undefined; + this.#decodingAudioFrames.length = 0; + this.#decodedAudioFrames.length = 0; + this.#scheduledAudioSourceNodes.length = 0; + this.#lastScheduledAudioFrameTime = -1; + this.#audioDecoder.reset(); + } + + #updateVolume(): void { + if (this.#volumeGainNode === undefined) return; + this.#volumeGainNode.gain.value = this.#muted ? 0 : this.#volume; + } + + #updateAudioPlaybackRate() { + // Re-schedule all audio nodes with the new playback rate. + const currentTimeInMicros = 1e6 * this.#currentTime; + const playbackRate = this.#playbackRate; + for (const entry of this.#scheduledAudioSourceNodes.slice()) { + entry.node.stop(); + this.#scheduleAudioBuffer( + entry.node.buffer!, + entry.timestamp, + currentTimeInMicros, + playbackRate + ); + } + } + + #scheduleAudioBuffer( + audioBuffer: AudioBuffer, + timestamp: number, + currentTimeInMicros: number, + playbackRate: number + ): void { + const node = this.#audioContext!.createBufferSource(); + node.buffer = audioBuffer; + node.connect(this.#volumeGainNode!); + + const entry = { node: node, timestamp }; + this.#scheduledAudioSourceNodes.push(entry); + node.addEventListener("ended", () => { + arrayRemove(this.#scheduledAudioSourceNodes, entry); + }); + + const offset = (timestamp - currentTimeInMicros) / (1e6 * playbackRate); + node.playbackRate.value = Math.abs(playbackRate); + if (offset > 0) { + node.start(0, offset); + } else { + node.start(-offset); + } + } + #isPotentiallyPlaying(): boolean { // https://html.spec.whatwg.org/multipage/media.html#potentially-playing return !this.#paused && !this.#hasEndedPlayback() && !this.#isBlocked(); @@ -811,6 +1249,9 @@ export class BabyVideoElement extends HTMLElement { if (!this.#paused) { this.#notifyAboutPlaying(); } + // Decode more frames + this.#decodeVideoFrames(); + this.#decodeAudio(); } // If the new ready state is HAVE_ENOUGH_DATA if (newReadyState === MediaReadyState.HAVE_ENOUGH_DATA) { @@ -880,3 +1321,94 @@ export class BabyVideoElement extends HTMLElement { } customElements.define("baby-video", BabyVideoElement); + +function isFrameBeyondTime( + frame: EncodedChunk | AudioData | VideoFrame, + direction: Direction, + timeInMicros: number +): boolean { + if (direction == Direction.FORWARD) { + return frame.timestamp + frame.duration! <= timeInMicros; + } else { + return frame.timestamp >= timeInMicros; + } +} + +function getFrameTolerance(frame: EncodedChunk | AudioData | VideoFrame) { + return Math.ceil(frame.duration! / 16); +} + +function isFrameTimestampEqual( + left: EncodedChunk, + right: AudioData | VideoFrame +): boolean { + // Due to rounding, there can be a small gap between encoded and decoded frames. + return Math.abs(left.timestamp - right.timestamp) <= getFrameTolerance(left); +} + +function isConsecutiveAudioFrame( + previous: AudioData, + next: AudioData, + direction: Direction +): boolean { + let diff: number; + if (direction === Direction.BACKWARD) { + diff = previous.timestamp - (next.timestamp + next.duration); + } else { + diff = next.timestamp - (previous.timestamp + previous.duration); + } + // Due to rounding, there can be a small gap between consecutive audio frames. + return Math.abs(diff) <= getFrameTolerance(previous); +} + +function cloneEncodedAudioChunk( + original: EncodedAudioChunk, + timestamp: number +): EncodedAudioChunk { + const data = new ArrayBuffer(original.byteLength); + original.copyTo(data); + return new EncodedAudioChunk({ + data, + timestamp, + duration: original.duration ?? undefined, + type: original.type + }); +} + +function cloneAudioData(original: AudioData, timestamp: number): AudioData { + const format = "f32-planar"; + let totalSize = 0; + for ( + let channelIndex = 0; + channelIndex < original.numberOfChannels; + channelIndex++ + ) { + totalSize += + original.allocationSize({ format, planeIndex: channelIndex }) / + Float32Array.BYTES_PER_ELEMENT; + } + const buffer = new Float32Array(totalSize); + let offset = 0; + for ( + let channelIndex = 0; + channelIndex < original.numberOfChannels; + channelIndex++ + ) { + const options: AudioDataCopyToOptions = { + format, + planeIndex: channelIndex + }; + const channelSize = + original.allocationSize(options) / Float32Array.BYTES_PER_ELEMENT; + original.copyTo(buffer.subarray(offset, offset + totalSize), options); + offset += channelSize; + } + return new AudioData({ + data: buffer, + format, + numberOfChannels: original.numberOfChannels, + numberOfFrames: original.numberOfFrames, + sampleRate: original.sampleRate, + timestamp: timestamp + }); +} diff --git a/vite.config.js b/vite.config.js index f2b3c4f..5e9740d 100644 --- a/vite.config.js +++ b/vite.config.js @@ -5,6 +5,6 @@ export default defineConfig({ build: { target: "es2022", minify: false, - sourcemap: true, - }, + sourcemap: true + } });