Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Video metadata processing no longer writes temp files #293

Open
wants to merge 47 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
2efa849
ClippingSubsampler rewrite and bug fixes
MattUnderscoreZhang Jan 18, 2024
a5c9649
More refactoring of ClippingSubsampler, plus a fix to _get_clip_inter…
MattUnderscoreZhang Jan 18, 2024
2cb5854
Finished refactoring ClippingSubsampler
MattUnderscoreZhang Jan 18, 2024
6106f62
Merge branch 'clipping_subsampler_rewrite' into all_fixes
MattUnderscoreZhang Jan 18, 2024
5d03b72
Final code changes
MattUnderscoreZhang Jan 19, 2024
47c7d64
Added docstrings
MattUnderscoreZhang Jan 19, 2024
5aa84d4
Passed tests and linting
MattUnderscoreZhang Jan 19, 2024
140e1ab
Made type annotations consistent with Python 3.8
MattUnderscoreZhang Jan 19, 2024
077ca27
More annotation fixes
MattUnderscoreZhang Jan 19, 2024
32fa4ea
The Python 3.8 annotation needs a lot of hand-holding, it seems
MattUnderscoreZhang Jan 19, 2024
5a8957f
Pylint has to cut it out, I swear to God
MattUnderscoreZhang Jan 19, 2024
f0f0168
No real change, just relauching unit tests which failed due to connec…
MattUnderscoreZhang Jan 19, 2024
f5d7c85
Merge branch 'main' into clipping_subsampler_refactor
iejMac Jan 19, 2024
388f51a
Merge branch 'main' into clipping_subsampler_refactor
rom1504 Jan 21, 2024
5101379
Merge remote-tracking branch 'origin/main' into clipping_subsampler_r…
MattUnderscoreZhang Jan 22, 2024
1df88dd
Linting issue
MattUnderscoreZhang Jan 22, 2024
226fba3
Another linting issue
MattUnderscoreZhang Jan 22, 2024
8ed5074
Separated per-shard code from code that should only be executed once
MattUnderscoreZhang Jan 24, 2024
e862eaa
Pulled ShardStatus parameters into their own data type
MattUnderscoreZhang Jan 24, 2024
d158106
Cleaned up shard processing error handling
MattUnderscoreZhang Jan 24, 2024
5cd53a9
Cleaned up code
MattUnderscoreZhang Jan 24, 2024
ffe0e71
Bug fixes
MattUnderscoreZhang Jan 24, 2024
2c7daf8
Formatting
MattUnderscoreZhang Jan 24, 2024
ac5a35b
Fixed linting issues
MattUnderscoreZhang Jan 24, 2024
5222f39
Fixing more damn linting
MattUnderscoreZhang Jan 24, 2024
6dc8991
Added a missing docstring
MattUnderscoreZhang Jan 24, 2024
6cbb43f
Unified SubsetWorker and DownloadWorker code
MattUnderscoreZhang Jan 24, 2024
d5f3b19
Bug fixes
MattUnderscoreZhang Jan 24, 2024
efceb33
Merge branch 'main' into download_worker_refactoring
MattUnderscoreZhang Jan 24, 2024
f33ed6c
Linting
MattUnderscoreZhang Jan 24, 2024
fb89ced
Linting again
MattUnderscoreZhang Jan 24, 2024
fca3332
Forgot a docstring
MattUnderscoreZhang Jan 24, 2024
8f94077
Made CutDetectionSubsampler take the same form of inputs and outputs …
MattUnderscoreZhang Jan 25, 2024
d3ab8aa
Removed unnecessary thread operations
MattUnderscoreZhang Jan 25, 2024
d12f2e9
Added save_temp_input_streams function
MattUnderscoreZhang Jan 25, 2024
547b653
Added functions for converting between streams and temp files
MattUnderscoreZhang Jan 25, 2024
d117950
FFProbeSubsampler now does not save temp file
MattUnderscoreZhang Jan 26, 2024
94b2cd6
CutDetectionSubsampler now does not save temp file
MattUnderscoreZhang Jan 26, 2024
81c305f
Separated metadata collection functions
MattUnderscoreZhang Jan 26, 2024
d8d37a1
Code cleanup for clarity
MattUnderscoreZhang Jan 26, 2024
d235c26
More code simplification
MattUnderscoreZhang Jan 26, 2024
99e82cf
Merge branch 'main' into video_metadata_no_io
MattUnderscoreZhang Jan 26, 2024
73fe44a
Fixed bugs
MattUnderscoreZhang Jan 26, 2024
e63b8b9
Unit tests and linting
MattUnderscoreZhang Jan 26, 2024
32ef272
Black formatting
MattUnderscoreZhang Jan 26, 2024
cba413c
Merge branch 'main' into video_metadata_no_io
MattUnderscoreZhang Jan 28, 2024
75f0681
Fixed a typo
MattUnderscoreZhang Jan 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 9 additions & 16 deletions tests/test_subsamplers.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,15 +211,14 @@ def test_audio_rate_subsampler(sample_rate, n_audio_channels):
"cut_detection_mode,framerates", [("longest", []), ("longest", [1]), ("all", []), ("all", [1])]
)
def test_cut_detection_subsampler(cut_detection_mode, framerates):
current_folder = os.path.dirname(__file__)
video = os.path.join(current_folder, "test_files/test_video.mp4")
with open(video, "rb") as vid_f:
video_bytes = vid_f.read()

subsampler = CutDetectionSubsampler(cut_detection_mode, framerates, threshold=5)

streams = {"video": [video_bytes]}
streams, cuts, err_msg = subsampler(streams)
current_folder = os.path.dirname(__file__)
video_filepath = os.path.join(current_folder, "test_files/test_video.mp4")
metadata, error_message = subsampler(video_filepath)
assert error_message is None
cuts = metadata["cuts"]

if cut_detection_mode == "longest":
assert len(cuts["cuts_original_fps"]) == 1
assert cuts["cuts_original_fps"][0] == [0, 2096]
Expand Down Expand Up @@ -276,17 +275,11 @@ def test_optical_flow_subsampler(detector, fps, params):

@pytest.mark.parametrize("extract_keyframes", [False, True])
def test_ffprobe_subsampler(extract_keyframes):
current_folder = os.path.dirname(__file__)
# video length - 2:02, 1080x1920, 30 fps
video = os.path.join(current_folder, "test_files/test_video.mp4")
with open(video, "rb") as vid_f:
video_bytes = vid_f.read()

subsampler = FFProbeSubsampler(extract_keyframes)

streams = {"video": [video_bytes]}
metadata = {}
subsampled_streams, metadata, error_message = subsampler(streams, metadata)
current_folder = os.path.dirname(__file__)
video_filepath = os.path.join(current_folder, "test_files/test_video.mp4")
metadata, error_message = subsampler(video_filepath)
assert error_message is None
assert metadata is not None
assert "video_metadata" in metadata
Expand Down
85 changes: 42 additions & 43 deletions video2dataset/subsamplers/cut_detection_subsampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
"""
import numpy as np
from scenedetect import ContentDetector, SceneManager, open_video
import os
import tempfile
from typing import Tuple, List, Optional, Literal

from .subsampler import Subsampler
from video2dataset.subsamplers.subsampler import Subsampler
from video2dataset.types import Metadata, Error

# TODO: this can be done more elegantly:
# from scenedetect import scene_manager and set that in correct namespace
Expand Down Expand Up @@ -45,53 +45,52 @@ class CutDetectionSubsampler(Subsampler):
- min_scene_len - minimum scene length to not drop a scene (see pyscenedeteect docs for more explanation)
"""

def __init__(self, cut_detection_mode="all", framerates=None, threshold=27, min_scene_len=15):
self.framerates = framerates
def __init__(
self,
cut_detection_mode: Literal["all", "longest"] = "all",
framerates: Optional[List[int]] = None,
threshold: int = 27,
min_scene_len: int = 15,
):
self.framerates = framerates if framerates is not None else []
self.cut_detection_mode = cut_detection_mode
self.threshold = threshold
self.min_scene_len = min_scene_len

def __call__(self, streams, metadata=None):
video_bytes = streams["video"][0]

def __call__(self, video_filepath: str, metadata: Optional[Metadata] = None) -> Tuple[Metadata, Error]:
metadata = metadata if metadata is not None else {}
try:
with tempfile.TemporaryDirectory() as tmpdir:
video_path = os.path.join(tmpdir, "input.mp4")
with open(video_path, "wb") as f:
f.write(video_bytes)

video = open_video(video_path)
# find scene changes
video = open_video(video_filepath)
detector = ContentDetector(threshold=self.threshold, min_scene_len=self.min_scene_len)
scene_manager = SceneManager()
scene_manager.add_detector(detector)
scene_manager.auto_downscale = False
scene_manager.downscale = video.frame_size[0] // DEFAULT_MIN_WIDTH
scene_manager.detect_scenes(video=video)

# extract cuts in both original fps and target fps
cuts = {}
original_fps = video.frame_rate
cuts["original_fps"] = original_fps
cuts["cuts_original_fps"] = get_scenes_from_scene_manager(scene_manager, self.cut_detection_mode)
for target_fps in self.framerates:
video.reset()

detector = ContentDetector(threshold=self.threshold, min_scene_len=self.min_scene_len)
scene_manager = SceneManager()
scene_manager.add_detector(detector)
scene_manager.auto_downscale = False
scene_manager.downscale = video.frame_size[0] // DEFAULT_MIN_WIDTH

cuts = {}
original_fps = video.frame_rate
cuts["original_fps"] = original_fps

scene_manager.detect_scenes(video=video)
cuts["cuts_original_fps"] = get_scenes_from_scene_manager(scene_manager, self.cut_detection_mode)
if self.framerates is not None:
for target_fps in self.framerates:
video.reset()

detector = ContentDetector(threshold=self.threshold, min_scene_len=self.min_scene_len)
scene_manager = SceneManager()
scene_manager.add_detector(detector)
frame_skip = max(
int(original_fps // target_fps) - 1, 0
) # if we take 1 frame and skip N frames we're sampling 1/N+1 % of the video
# so if we desire to sample 1/N of the video, we need to subtract one when doing frame skipping

scene_manager.detect_scenes(video=video, frame_skip=frame_skip)
cuts[f"cuts_{target_fps}"] = get_scenes_from_scene_manager(
scene_manager, self.cut_detection_mode
)
scene_manager.clear()
except Exception as err: # pylint: disable=broad-except
return {}, None, str(err)
frame_skip = max(
int(original_fps // target_fps) - 1, 0
) # if we take 1 frame and skip N frames we're sampling 1/N+1 % of the video
# so if we desire to sample 1/N of the video, we need to subtract one when doing frame skipping

return streams, cuts, None
scene_manager.detect_scenes(video=video, frame_skip=frame_skip)
cuts[f"cuts_{target_fps}"] = get_scenes_from_scene_manager(scene_manager, self.cut_detection_mode)
scene_manager.clear()

# save and return metadata
metadata["cuts"] = cuts
except Exception as err: # pylint: disable=broad-except
return metadata, str(err)
return metadata, None
77 changes: 37 additions & 40 deletions video2dataset/subsamplers/ffprobe_subsampler.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
"""extracts basic video compression metadata."""
import os
import json
import subprocess
import tempfile
from typing import Tuple, Optional

from .subsampler import Subsampler
from video2dataset.subsamplers.subsampler import Subsampler
from video2dataset.types import Metadata, Error


# TODO: figuer out why this is so slow (12 samples/s)
Expand All @@ -18,41 +18,38 @@ class FFProbeSubsampler(Subsampler):
def __init__(self, extract_keyframes=False):
self.extract_keyframes = extract_keyframes

def __call__(self, streams, metadata):
# TODO: this should also work for audio (maybe others)
video_bytes = streams["video"][0]
with tempfile.TemporaryDirectory() as tmpdir:
with open(os.path.join(tmpdir, "input.mp4"), "wb") as f:
f.write(video_bytes)
try:
command = [
"ffprobe",
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
"-show_streams",
f"{tmpdir}/input.mp4",
def __call__(self, video_filepath: str, metadata: Optional[Metadata] = None) -> Tuple[Metadata, Error]:
metadata = metadata if metadata is not None else {}
try:
# extract video metadata
command = [
"ffprobe",
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
"-show_streams",
f"{video_filepath}",
]
if self.extract_keyframes:
command.extend(["-select_streams", "v:0", "-show_entries", "packet=pts_time,flags"])
process = subprocess.run(command, capture_output=True, text=True, check=True)
video_metadata = json.loads(process.stdout)

# extract keyframe timestamps if requested
if self.extract_keyframes:
keyframe_timestamps = [
float(packet["pts_time"]) for packet in video_metadata["packets"] if "K" in packet.get("flags", "")
]

if self.extract_keyframes:
command.extend(["-select_streams", "v:0", "-show_entries", "packet=pts_time,flags"])

process = subprocess.run(command, capture_output=True, text=True, check=True)
video_metadata = json.loads(process.stdout)

if self.extract_keyframes:
keyframe_info = [entry for entry in video_metadata["packets"] if "K" in entry.get("flags", "")]
keyframe_timestamps = [float(entry["pts_time"]) for entry in keyframe_info]
if "duration" in video_metadata["format"]:
duration = float(video_metadata["format"]["duration"])
keyframe_timestamps.append(duration)
video_metadata["keyframe_timestamps"] = keyframe_timestamps
video_metadata.pop("packets") # Don't need it anymore
metadata["video_metadata"] = video_metadata

except Exception as err: # pylint: disable=broad-except
return streams, metadata, str(err)

return streams, metadata, None
if "duration" in video_metadata["format"]:
duration = float(video_metadata["format"]["duration"])
keyframe_timestamps.append(duration)
video_metadata["keyframe_timestamps"] = keyframe_timestamps
video_metadata.pop("packets") # Don't need it anymore

# save and return metadata
metadata["video_metadata"] = video_metadata
except Exception as err: # pylint: disable=broad-except
return metadata, str(err)
return metadata, None
8 changes: 5 additions & 3 deletions video2dataset/subsamplers/noop_subsampler.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
"""No operation subsampler"""
from typing import List, Tuple

from .subsampler import Subsampler
from video2dataset.subsamplers.subsampler import Subsampler
from video2dataset.types import Metadata, Error, TempFilepaths


class NoOpSubsampler(Subsampler):
def __init__(self):
pass

def __call__(self, streams, metadata):
return streams, [metadata], None
def __call__(self, filepaths: TempFilepaths, metadata: Metadata) -> Tuple[TempFilepaths, List[Metadata], Error]:
return filepaths, [metadata], None
11 changes: 10 additions & 1 deletion video2dataset/types.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Type definitions for video2dataset."""
from typing import List, TypedDict
from typing import List, TypedDict, Optional


class EncodeFormats(TypedDict, total=False):
Expand All @@ -14,3 +14,12 @@ class Streams(TypedDict, total=False):

# TODO: make more structured
Metadata = dict


Error = Optional[str]


# TODO: remove after refactoring is complete
class TempFilepaths(TypedDict, total=False):
video: List[str]
audio: List[str]
Loading
Loading