Skip to content

Commit

Permalink
197 stabilization (#199)
Browse files Browse the repository at this point in the history
Fixed several issues with last frame in video not being read properly. Improved progress bar behaviour and numbering.

* fix #197

* fix #198

* end_frame behaviour investigated and improved. warning msg suppressed in unit tests #198

* bump version to 0.7.1
  • Loading branch information
hcwinsemius authored Dec 13, 2024
1 parent 26dbcb7 commit 03bdef3
Show file tree
Hide file tree
Showing 10 changed files with 127 additions and 73 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ instance/
# Sphinx documentation
docs/_build/
docs/_generated
docs/_examples
# PyBuilder
target/

Expand Down Expand Up @@ -104,4 +105,3 @@ env/

# PyCharm
.idea/

14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
## [0.7.1] - 2024-12-13
### Added
### Changed
progress bars while reading can be configured with new flag `progress`

### Deprecated
### Removed
### Fixed
Reading of last frame in video often got errors. This is now more robust
Writing with `frames.to_video` became very slow with the latest video reader, this has been fixed, it is now very fast.

### Security


## [0.7.0] - 2024-12-10
### Added
`get_piv` now uses several engines, `engine="numba"` is a lot fastr
Expand Down
2 changes: 1 addition & 1 deletion pyorc/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""pyorc: free and open-source image-based surface velocity and discharge."""

__version__ = "0.7.0"
__version__ = "0.7.1"

from .api import CameraConfig, Frames, Transect, Velocimetry, Video, get_camera_config, load_camera_config # noqa
from .project import * # noqa
Expand Down
66 changes: 39 additions & 27 deletions pyorc/api/frames.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,7 @@ def animate(i):
anim = FuncAnimation(f, animate, init_func=init, frames=frames, **anim_kwargs)
anim.save(fn, **video_kwargs)

def to_video(self, fn, video_format=None, fps=None):
def to_video(self, fn, video_format=None, fps=None, progress=True):
"""Write frames to a video file without any layout.
Frames from the input object are written into a video file. The format and frame
Expand All @@ -564,20 +564,10 @@ def to_video(self, fn, video_format=None, fps=None):
fps : float, optional
Frames per second for the output video. If not specified, it is estimated
from the time differences in the input frames.
progress : bool, optional
Display a progress bar while writing the video frames. (default: True)
"""
# """Write frames to a video file without any layout.
#
# Parameters
# ----------
# fn : str
# Path to output file
# video_format : cv2.VideoWriter_fourcc, optional
# A VideoWriter preference, default is cv2.VideoWriter_fourcc(*"mp4v")
# fps : float, optional
# Frames per second, if not provided, derived from original video
#
# """
if video_format is None:
# set to a default
video_format = cv2.VideoWriter_fourcc(*"mp4v")
Expand All @@ -587,20 +577,42 @@ def to_video(self, fn, video_format=None, fps=None):
h = self._obj.shape[1]
w = self._obj.shape[2]
out = cv2.VideoWriter(fn, video_format, fps, (w, h))
pbar = tqdm(self._obj, position=0, leave=True)
pbar.set_description("Writing frames")
for n, f in enumerate(pbar):
if len(f.shape) == 3:
img = cv2.cvtColor(np.uint8(f.values), cv2.COLOR_RGB2BGR)
else:
img = f.values
if n == 0:
# make a scale between 0 and 255, only with first frame
img_min = img.min(axis=0).min(axis=0)
img_max = img.max(axis=0).max(axis=0)
img = np.uint8(255 * ((img - img_min) / (img_max - img_min)))
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
out.write(img)
with tqdm(total=len(self._obj), position=0, leave=True, disable=not (progress)) as pbar:
pbar.set_description("Writing frames")
first_frame = True
for n_start in range(0, len(self._obj), self._obj.chunksize):
frames_chunk = self._obj.isel(time=slice(n_start, n_start + self._obj.chunksize))
frames_chunk.load() # load in memory only once
for f in frames_chunk:
if len(f.shape) == 3:
img = cv2.cvtColor(np.uint8(f.values), cv2.COLOR_RGB2BGR)
else:
img = f.values
if first_frame:
first_frame = False
# make a scale between 0 and 255, only with first frame
img_min = img.min(axis=0).min(axis=0)
img_max = img.max(axis=0).max(axis=0)
img = np.uint8(255 * ((img - img_min) / (img_max - img_min)))
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)

out.write(img)
pbar.update(1)
#
# pbar = tqdm(self._obj, position=0, leave=True)
# pbar.set_description("Writing frames")
# for n, f in enumerate(pbar):
# if len(f.shape) == 3:
# img = cv2.cvtColor(np.uint8(f.values), cv2.COLOR_RGB2BGR)
# else:
# img = f.values
# if n == 0:
# # make a scale between 0 and 255, only with first frame
# img_min = img.min(axis=0).min(axis=0)
# img_max = img.max(axis=0).max(axis=0)
# img = np.uint8(255 * ((img - img_min) / (img_max - img_min)))
# img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
# out.write(img)
out.release()

plot = _frames_plot
34 changes: 25 additions & 9 deletions pyorc/api/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,13 @@ def __init__(
h_a: Optional[float] = None,
start_frame: Optional[int] = None,
end_frame: Optional[int] = None,
freq: Optional[int] = 1,
freq: int = 1,
chunksize: int = 20,
stabilize: Optional[List[List]] = None,
lazy: bool = True,
rotation: Optional[int] = None,
fps: Optional[float] = None,
progress: bool = True,
):
"""Video class, inheriting parts from cv2.VideoCapture.
Expand All @@ -73,6 +75,8 @@ def __init__(
last frame to use in analysis (if not set, last frame available in video will be used)
freq : int, optional
Frequency to read frames with. Default is 1, if set to e.g. 2 only each 2nd frame will be read.
chunksize : int, optional
Amount of frames to read in one pass, defaults to 20
stabilize : list of lists, optional
set of coordinates, that together encapsulate the polygon that defines the mask, separating land from water.
The mask is used to select region (on land) for rigid point search for stabilization. If not set, then no
Expand All @@ -86,6 +90,8 @@ def __init__(
fps : float, optional
hard set for frames per second. Use this with utmost caution and only when you are confident that the video
metadata is incorrect.
progress : bool, optional
Display progress bar while reading video. Default is True.
"""
assert isinstance(start_frame, (int, type(None))), 'start_frame must be of type "int"'
Expand All @@ -95,6 +101,7 @@ def __init__(
self.ms = None
self.mask = None
self.lazy = lazy
self.progress = progress
self.stabilize = stabilize
if camera_config is not None:
self.camera_config = camera_config
Expand Down Expand Up @@ -153,7 +160,7 @@ def __init__(
self.rotation = rotation
# extract times, frame numbers and frames as far as available
time, frame_number, frames = cv.get_time_frames(
cap, start_frame, end_frame, lazy=lazy, rotation=self.rotation, method="bgr", fps=fps
cap, start_frame, end_frame, lazy=lazy, rotation=self.rotation, method="bgr", fps=fps, progress=progress
)
self.frames = frames
# check if end_frame changed
Expand All @@ -166,6 +173,7 @@ def __init__(

self.end_frame = end_frame
self.freq = freq
self.chunksize = chunksize
self.time = time
self.frame_number = frame_number
self.start_frame = start_frame
Expand Down Expand Up @@ -246,7 +254,7 @@ def end_frame(self, end_frame: Optional[int] = None):
if end_frame is None:
self._end_frame = self.frame_count - 1
else:
self._end_frame = min(self.frame_count - 1, end_frame)
self._end_frame = end_frame

@property
def freq(self):
Expand All @@ -257,6 +265,15 @@ def freq(self):
def freq(self, freq=1):
self._freq = freq

@property
def progress(self):
"""Get progress property."""
return self._progress

@progress.setter
def progress(self, progress=True):
self._progress = progress

@property
def stabilize(self):
"""Get stabilization region coordinates."""
Expand Down Expand Up @@ -437,7 +454,7 @@ def get_frames(self, method: Optional[Literal["grayscale", "rgb", "hsv", "bgr"]]
), "No camera configuration is set, add it to the video using the .camera_config method"
# camera_config may be altered for the frames object, so copy below
camera_config = copy.deepcopy(self.camera_config)
frames_chunk = 20
# frames_chunk = 20
if self.frames is None:
# a specific method for collecting frames is requested or lazy access is requested.
# get_frame = dask.delayed(self.get_frame, pure=True) # Lazy version of get_frame
Expand All @@ -447,15 +464,12 @@ def get_frames(self, method: Optional[Literal["grayscale", "rgb", "hsv", "bgr"]]
# derive video shape
sample = get_frames_chunk(n_start=0, n_end=1, method=method).compute()[0]
data_array = []
for n_start in range(0, len(self.frame_number), frames_chunk):
n_end = np.minimum(n_start + frames_chunk, len(self.frame_number))
for n_start in range(0, len(self.frame_number), self.chunksize):
n_end = np.minimum(n_start + self.chunksize, len(self.frame_number))
frame_chunk = get_frames_chunk(n_start=n_start, n_end=n_end, method=method)
shape = (n_end - n_start, *sample.shape)
data_array.append(da.from_delayed(frame_chunk, dtype=sample.dtype, shape=shape))

# sample = frames[0].compute()
# data_array = [da.from_delayed(frame, dtype=sample.dtype, shape=sample.shape) for frame in frames]
# da_stack = da.stack(data_array, axis=0)
da_stack = da.concatenate(data_array, axis=0)
else:
da_stack = self.frames
Expand Down Expand Up @@ -487,6 +501,7 @@ def get_frames(self, method: Optional[Literal["grayscale", "rgb", "hsv", "bgr"]]
"camera_shape": str([len(y), len(x)]),
"camera_config": camera_config.to_json(),
"h_a": json.dumps(self.h_a),
"chunksize": self.chunksize,
}
frames = xr.DataArray(
da_stack,
Expand Down Expand Up @@ -545,4 +560,5 @@ def get_ms(self, cap: cv2.VideoCapture, split: Optional[int] = 2):
end_frame=self.end_frame,
split=split,
mask=self.mask,
progress=self.progress,
)
27 changes: 16 additions & 11 deletions pyorc/cv.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def _get_cam_mtx(height, width, c=2.0, focal_length=None):
return mtx


def get_ms_gftt(cap, start_frame=0, end_frame=None, n_pts=None, split=2, mask=None, wdw=4):
def get_ms_gftt(cap, start_frame=0, end_frame=None, n_pts=None, split=2, mask=None, wdw=4, progress=True):
"""Calculate motion smoothing of video frames using Good Features to Track and Lucas-Kanade Optical Flow methods.
This function processes each frame between `start_frame` and `end_frame` to estimate and smooth affine
Expand All @@ -212,6 +212,8 @@ def get_ms_gftt(cap, start_frame=0, end_frame=None, n_pts=None, split=2, mask=No
Optional mask to specify regions of interest within the frame for feature detection.
wdw : int, optional
Window size for smoothing the affine transformations over time. Defaults to 4.
progress : bool, optional
Show progress bar or not. Defaults to True.
Returns
-------
Expand All @@ -221,10 +223,9 @@ def get_ms_gftt(cap, start_frame=0, end_frame=None, n_pts=None, split=2, mask=No
"""
# set end_frame to last if not defined
end_frame = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) if end_frame is None else end_frame
# make a start transform which does not change the first frame
m = np.eye(3)[0:2]
# m2 = np.eye(3)[0:2]
ms = []
# ms2 = []
m_key = copy.deepcopy(m)
# get start frame and points
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
Expand All @@ -233,7 +234,6 @@ def get_ms_gftt(cap, start_frame=0, end_frame=None, n_pts=None, split=2, mask=No

# Read first frame
_, img_key = cap.read()
_, img_key = cap.read()
# Convert frame to grayscale
img1 = cv2.cvtColor(img_key, cv2.COLOR_BGR2GRAY)
img_key = img1
Expand All @@ -245,23 +245,22 @@ def get_ms_gftt(cap, start_frame=0, end_frame=None, n_pts=None, split=2, mask=No
# get features from first key frame
prev_pts = _gftt_split(img_key, split, n_pts, mask=mask)

pbar = tqdm(range(n_frames), position=0, leave=True)
pbar = tqdm(range(n_frames - 1), position=0, leave=True, disable=not (progress))
pbar.set_description("Deriving stabilization parameters from second frame onwards")
for i in pbar:
ms.append(m)
# ms2.append(m2)
_, img2 = cap.read()
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
curr_pts, status, err = cv2.calcOpticalFlowPyrLK(img_key, img2, prev_pts, None)
# curr_pts = curr_pts[status == 1]
# prev_pts = prev_pts[status == 1]
m_part = cv2.estimateAffine2D(curr_pts, prev_pts)[0]
m = _combine_m(m_key, m_part)
if i % 30 == 0:
img_key = img1
prev_pts = _gftt_split(img_key, split, n_pts, mask=mask)
m_key = copy.deepcopy(m)
img1 = img2

# add the very last transformation
ms.append(m)
# smooth the affines over time
ma = np.array(ms)
for m in range(ma.shape[1]):
Expand Down Expand Up @@ -917,7 +916,7 @@ def get_frame(cap, rotation=None, ms=None, method="grayscale"):
return ret, img


def get_time_frames(cap, start_frame, end_frame, lazy=True, fps=None, **kwargs):
def get_time_frames(cap, start_frame, end_frame, lazy=True, fps=None, progress=True, **kwargs):
"""Obtain valid time stamps and frame numbers from video capture object.
Valid frames may start and end at start_frame and end_frame, respectively. However, certain required frames may
Expand All @@ -935,6 +934,8 @@ def get_time_frames(cap, start_frame, end_frame, lazy=True, fps=None, **kwargs):
read frames lazily (default) or not. Set to False for direct reading (faster, but more memory)
fps : float, optional
hard enforced frames per second number (used when metadata of video is incorrect)
progress : bool, optional
display progress bar. Default is True.
**kwargs : dict, optional
additional keyword arguments passed to get_frame() function
Expand All @@ -947,7 +948,11 @@ def get_time_frames(cap, start_frame, end_frame, lazy=True, fps=None, **kwargs):
"""
cap.set(cv2.CAP_PROP_POS_FRAMES, np.float64(start_frame))
pbar = tqdm(
total=end_frame - start_frame + 1, position=0, desc="Scanning video", disable=not (progress), leave=True
)
ret, img = get_frame(cap, **kwargs)
# pbar.update(1)
n = start_frame
time = []
frame_number = []
Expand All @@ -962,13 +967,13 @@ def get_time_frames(cap, start_frame, end_frame, lazy=True, fps=None, **kwargs):
frame_number.append(n)
n += 1
ret, img = get_frame(cap, **kwargs) # read frame 1 + ...
pbar.update(1)
if ret == False:
break
t2 = cap.get(cv2.CAP_PROP_POS_MSEC)
if t2 <= 0.0:
# invalid time difference, stop reading.
break

# do a final check if the last frame(s) are readable by direct seek and read. Sometimes this results in not being
# able to r
last_valid_idx = _check_valid_frames(cap, frame_number)
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ filterwarnings = [
"ignore:invalid value encountered*:RuntimeWarning", # linestrings issue with plotting transects.
"ignore:Degrees of freedom *:RuntimeWarning", # not fully clear why this appears in user interfacing, test with future updates.
"ignore:numpy.ndarray size changed, may indicate binary incompatibility:RuntimeWarning", # likely caused by incompatibility in used numpy version across libraries. May resolve with future updates.
"ignore:\"openpiv\" is currently the default engine, but it will be replaced by \"numba\" in a future release:DeprecationWarning", # must be removed when openpiv is removed as dependency
"ignore:Memory availability*:UserWarning", # memory availability may be small during test, not problematic
]

[tool.ruff]
Expand Down
Loading

0 comments on commit 03bdef3

Please sign in to comment.