From ee3c82553399e65c384b52ed0e1f9599cfd3248a Mon Sep 17 00:00:00 2001 From: Matt Whiteway Date: Mon, 9 Dec 2024 19:13:56 +0000 Subject: [PATCH] start pipeline construction --- README.md | 2 +- configs/config_mirror-mouse.yaml | 208 +++++++++++++++ configs/pipeline.yaml | 50 +++- lp3d_analysis/io.py | 17 ++ lp3d_analysis/train.py | 439 +++++++++++++++++++++++++++++++ pipelines/pipeline_simple.py | 258 ++++++++++++++++++ 6 files changed, 972 insertions(+), 2 deletions(-) create mode 100644 configs/config_mirror-mouse.yaml create mode 100644 lp3d_analysis/io.py create mode 100644 lp3d_analysis/train.py create mode 100644 pipelines/pipeline_simple.py diff --git a/README.md b/README.md index c7414f8..0f32c49 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ sudo apt install ffmpeg NOTE: do *not* do this if you are setting up this repo in a Lightning Studio. ``` -conda create --yes --name labeler python=3.8 +conda create --yes --name labeler python=3.10 conda activate labeler ``` diff --git a/configs/config_mirror-mouse.yaml b/configs/config_mirror-mouse.yaml new file mode 100644 index 0000000..e201655 --- /dev/null +++ b/configs/config_mirror-mouse.yaml @@ -0,0 +1,208 @@ +data: + # dimensions of training images + image_orig_dims: + height: 406 + width: 396 + # resize dimensions to streamline model creation + image_resize_dims: + height: 256 + width: 256 + # ABSOLUTE path to data directory + data_dir: /teamspace/studios/this_studio/data/mirror-mouse + # ABSOLUTE path to unlabeled videos' directory. the below is just a toy example for running Getting Started scripts + video_dir: videos_test + + # location of labels; for example script, this should be relative to `data_dir` + csv_file: CollectedData.csv + # downsample heatmaps - 2 | 3 + downsample_factor: 2 + # total number of keypoints + num_keypoints: 14 + # keypoint names + keypoint_names: + - paw1LH_top + - paw2LF_top + - paw3RF_top + - paw4RH_top + - tailBase_top + - tailMid_top + - nose_top + # - obs_top + - paw1LH_bot + - paw2LF_bot + - paw3RF_bot + - paw4RH_bot + - tailBase_bot + - tailMid_bot + - nose_bot + # - obsHigh_bot + # - obsLow_bot + + # for mirrored setups with all keypoints defined in same csv file, define matching + # columns for different keypoints (assumes x-y-x-y interleaving) + # each list corresponds to a single view, so in the example below there are 2 views + # keypoint 0 is from view 0 and matches up with keypoint 8 from view 2 + # columns that correspond to keypoints only labeled in a single view are omitted + # TODO: what if a keypoint is labeled in more than 1 but not all views? + # this info is only used for the multiview pca loss + + # mirrored_column_matches: + # - [0, 1, 2, 3, 4, 5, 6] + # - [8, 9, 10, 11, 12, 13, 14] + # columns_for_singleview_pca: [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14] + + mirrored_column_matches: + - [0, 1, 2, 3, 4, 5, 6] + - [7, 8, 9, 10, 11, 12, 13] + + columns_for_singleview_pca: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + +training: + # select from one of several predefined image/video augmentation pipelines + # default- resizing only + # dlc- imgaug pipeline implemented in DLC 2.0 package + # dlc-top-down- dlc augmentations plus vertical and horizontal flips + imgaug: dlc + # batch size of labeled data during training + train_batch_size: 10 #Tommy: original val 8 + # batch size of labeled data during validation + val_batch_size: 48 + # batch size of labeled data during test + test_batch_size: 48 + # fraction of labeled data used for training + train_prob: 0.8 + # fraction of labeled data used for validation (remaining used for test) + val_prob: 0.1 + # <=1 - fraction of total train frames (determined by `train_prob`) used for training + # >1 - number of total train frames used for training + train_frames: 100 + # number of gpus to train a single model + num_gpus: 1 + # number of cpu workers for data loaders + num_workers: 4 + # epochs over which to assess validation metrics for early stopping + early_stop_patience: 3 + # epoch at which backbone network weights begin updating + unfreezing_epoch: 0 #20 + # max training epochs; training may exit before due to early stopping + min_epochs: 100 + max_epochs: 100 + # frequency to log training metrics (one step is one batch) + log_every_n_steps: 10 + # frequency to log validation metrics + check_val_every_n_epoch: 5 + # select gpu for training + gpu_id: 0 + # rng seed for labeled batches + rng_seed_data_pt: 0 + # rng seed for weight initialization + rng_seed_model_pt: 0 + # learning rate scheduler + # multisteplr | [todo - reducelronplateau] + lr_scheduler: multisteplr + lr_scheduler_params: + multisteplr: + milestones: [150, 200, 250] + gamma: 0.5 + +model: + # list of unsupervised losses + # "pca_singleview" | "pca_multiview" | "temporal" | "unimodal_mse" | "unimodal_kl" + losses_to_use: [] + # backbone network: + # resnet18 | resnet34 | resnet50 | resnet101 | resnet152 | resnet50_contrastive + # resnet50_animalpose_apose | resnet50_animal_ap10k + # resnet50_human_jhmdb | resnet50_human_res_rle | resnet50_human_top_res + # efficientnet_b0 | efficientnet_b1 | efficientnet_b2 + backbone: resnet50_animal_ap10k + # prediction mode: regression | heatmap | heatmap_mhcrnn (context) + model_type: heatmap + # which heatmap loss to use + # "mse" | "kl" | "js" + heatmap_loss_type: mse + # tt expt name + model_name: test + +dali: + general: + seed: 123456 + base: + train: + sequence_length: 32 # 4 # step = sequence_length by default. done internally + predict: + # (train_batch_size + base.train.sequence_length) * 2 -> round down to nearest pow of 2 + sequence_length: 128 # step = sequence_length by default. done internally. + context: + train: # defaults: sequence_length=5, step=sequence_length + batch_size: 16 + predict: # defaults: sequence_length=5, step=1 + # (train_batch_size / 4 + context.train.batch_size) * 2 -> round down to nearest pow of 2 + sequence_length: 96 + +losses: + # loss = projection onto the discarded eigenvectors + pca_multiview: + # weight in front of PCA loss + log_weight: 5.0 + # predictions should lie within the low-d subspace spanned by these components + components_to_keep: 3 + # absolute error (in pixels) below which pca loss is zeroed out; if null, an empirical + # epsilon is computed using the labeled data + epsilon: null + # loss = projection onto the discarded eigenvectors + pca_singleview: + # weight in front of PCA loss + log_weight: 5.0 + # predictions should lie within the low-d subspace spanned by components that describe this fraction of variance + components_to_keep: 0.99 + # absolute error (in pixels) below which pca loss is zeroed out; if null, an empirical + # epsilon is computed using the labeled data + epsilon: null + # loss = norm of distance between successive timepoints + temporal: + # weight in front of temporal loss + log_weight: 5.0 + # for epsilon insensitive rectification + # (in pixels; diffs below this are not penalized) + epsilon: 20.0 + # nan removal value. + # (in prob; heatmaps with max prob values are removed) + prob_threshold: 0.05 + +eval: + # paths to the hydra config files in the output folder, OR absolute paths to such folders. + hydra_paths: [" "] + # predict? + predict_vids_after_training: true + # save .mp4? + save_vids_after_training: false + fiftyone: + # will be the name of the dataset (Mongo DB) created by FiftyOne. for video dataset, we will append dataset_name + "_video" + dataset_name: mirror-mouse + # if you want to manually provide a different model name to be displayed in FiftyOne + model_display_names: ["test_model"] + # whether to launch the app from the script (True), or from ipython (and have finer control over the outputs) + launch_app_from_script: false + + remote: true # for LAI, must be False + address: 127.0.0.1 # ip to launch the app on. + port: 5151 # port to launch the app on. + + # str with an absolute path to a directory containing videos for prediction. + test_videos_directory: null + # str with an absolute path to directory in which you want to save .csv with predictions + saved_vid_preds_dir: null + # confidence threshold for plotting a vid + confidence_thresh_for_vid: 0.90 + # str with absolute path to the video file you want plotted with keypoints + video_file_to_plot: null + # a list of strings, each points to a .csv file with predictions of a given model to the same video. will be combined with video_file_to_plot to make a visualization + pred_csv_files_to_plot: [" "] + +callbacks: + anneal_weight: + attr_name: total_unsupervised_importance + init_val: 0.0 + increase_factor: 0.01 + final_val: 1.0 + freeze_until_epoch: 0 \ No newline at end of file diff --git a/configs/pipeline.yaml b/configs/pipeline.yaml index 5df65b0..df2e255 100644 --- a/configs/pipeline.yaml +++ b/configs/pipeline.yaml @@ -1 +1,49 @@ -train_models: true \ No newline at end of file +# absolute path to lp yaml file +lightning_pose_config: /teamspace/studios/this_studio/lp3d-analysis/configs/config_mirror-mouse.yaml +# lightning_pose_config: /teamspace/studios/this_studio/lp3d-analysis/configs/config_crim13.yaml + +# [needed?] pipeline seed for initial data split +pipeline_seeds: 0 + +# initial training of an ensemble of networks +train_networks: + # run this section? + run: True + # overwrite previous results? + overwrite: False + # pose estimation data type + # data_type: lp + # ensemble seeds + ensemble_seeds: + - 0 + - 1 + - 2 + - 3 + - 4 + # number of ground truth labels for training + n_hand_labels: 100 + # model type + model_types: + - supervised + - context + # training parameters + min_steps: 5000 + max_steps: 5000 + milestone_steps: [2000, 3000, 4000] + val_check_interval: 50 + train_check_interval: 10 + +# post-processing options +post_processing: + eks_singleview: + run: False + overwrite: False + eks_multiview: + run: False + overwrite: False + +# visualization options +visualization: + run: False + overwrite: False + ens_var_plots: False diff --git a/lp3d_analysis/io.py b/lp3d_analysis/io.py new file mode 100644 index 0000000..1868ef6 --- /dev/null +++ b/lp3d_analysis/io.py @@ -0,0 +1,17 @@ +import yaml +from omegaconf import DictConfig + + +def load_cfgs(config_file: str): + # Load pipeline config file + with open(config_file, "r") as file: + cfg_pipe = yaml.safe_load(file) + cfg_pipe = DictConfig(cfg_pipe) + + # Load lightning pose config file from the path specified in pipeline config + lightning_pose_config_path = cfg_pipe.get("lightning_pose_config") + with open(lightning_pose_config_path, "r") as file: + lightning_pose_cfg = yaml.safe_load(file) + + cfg_lp = DictConfig(lightning_pose_cfg) + return cfg_pipe, cfg_lp diff --git a/lp3d_analysis/train.py b/lp3d_analysis/train.py new file mode 100644 index 0000000..01d464e --- /dev/null +++ b/lp3d_analysis/train.py @@ -0,0 +1,439 @@ +"""Functions for training.""" + +import gc +import glob +import os +import random +import shutil +from typing import List, Optional, Tuple + +import lightning.pytorch as pl +import numpy as np +import pandas as pd +import torch +from lightning_pose.utils import pretty_print_cfg, pretty_print_str +from lightning_pose.utils.io import (check_video_paths, + return_absolute_data_paths, + return_absolute_path) +from lightning_pose.utils.predictions import (predict_dataset, + predict_single_video) +from lightning_pose.utils.scripts import ( # get_callbacks, + calculate_train_batches, compute_metrics, get_data_module, get_dataset, + get_imgaug_transform, get_loss_factories, get_model) +from moviepy.editor import VideoFileClip +from omegaconf import DictConfig, OmegaConf +from typeguard import typechecked + + +@typechecked +def get_callbacks( + cfg: DictConfig, + early_stopping=True, + lr_monitor=True, + ckpt_model=True, + backbone_unfreeze=True, +) -> List: + + callbacks = [] + + if early_stopping: + early_stopping = pl.callbacks.EarlyStopping( + monitor="val_supervised_loss", + patience=cfg.training.early_stop_patience, + mode="min", + ) + callbacks.append(early_stopping) + + if lr_monitor: + lr_monitor = pl.callbacks.LearningRateMonitor(logging_interval="epoch") + callbacks.append(lr_monitor) + + if ckpt_model: + ckpt_callback = pl.callbacks.model_checkpoint.ModelCheckpoint( + monitor="val_supervised_loss", + mode="min", + save_top_k=1, + filename="best-checkpoint-step={step}-val_loss={val_supervised_loss:.2f}", + save_last=True, + ) + callbacks.append(ckpt_callback) + + if backbone_unfreeze: + transfer_unfreeze_callback = pl.callbacks.BackboneFinetuning( + unfreeze_backbone_at_epoch=cfg.training.unfreezing_epoch, + backbone_initial_ratio_lr=0.1, + should_align=True, + train_bn=True, + ) + callbacks.append(transfer_unfreeze_callback) + + # we just need this callback for unsupervised models + if (cfg.model.losses_to_use != []) and (cfg.model.losses_to_use is not None): + anneal_weight_callback = AnnealWeight(**cfg.callbacks.anneal_weight) + callbacks.append(anneal_weight_callback) + + return callbacks + + +def train( + cfg: DictConfig, + results_dir: str, + min_steps: int, + max_steps: int, + milestone_steps: List[int], + val_check_interval: int, + n_train_frames: Optional[int] = None +) -> Tuple[str, pl.LightningDataModule, pl.Trainer]: + + MIN_STEPS = min_steps + MAX_STEPS = max_steps + MILESTONE_STEPS = milestone_steps + cfg.training.train_frames = n_train_frames + + # mimic hydra, change dir into results dir + pwd = os.getcwd() + os.makedirs(results_dir, exist_ok=True) + os.chdir(results_dir) + + # reset all seeds + seed = 0 + os.environ["PYTHONHASHSEED"] = str(seed) + torch.manual_seed(seed) + np.random.seed(seed) + random.seed(seed) + torch.backends.cudnn.benchmark = False + torch.backends.cudnn.deterministic = True + + # ---------------------------------------------------------------------------------- + # set up data/model objects + # ---------------------------------------------------------------------------------- + + pretty_print_cfg(cfg) + + data_dir, video_dir = return_absolute_data_paths(data_cfg=cfg.data) + + # imgaug transform + imgaug_transform = get_imgaug_transform(cfg=cfg) + + # dataset + dataset = get_dataset(cfg=cfg, data_dir=data_dir, imgaug_transform=imgaug_transform) + + # datamodule; breaks up dataset into train/val/test + data_module = get_data_module(cfg=cfg, dataset=dataset, video_dir=video_dir) + + data_module.setup() + + num_train_frames = len(data_module.train_dataset) + if n_train_frames: + num_train_frames = n_train_frames + + step_per_epoch = num_train_frames / cfg.training.train_batch_size + print(f'step_per_epoch={step_per_epoch}') + + cfg.training.max_epochs = int(MAX_STEPS / step_per_epoch) + cfg.training.min_epochs = int(MIN_STEPS / step_per_epoch) + cfg.training.lr_scheduler_params.multisteplr.milestones = \ + [int(s / step_per_epoch) for s in MILESTONE_STEPS] + + # build loss factory which orchestrates different losses + loss_factories = get_loss_factories(cfg=cfg, data_module=data_module) + + # model + model = get_model(cfg=cfg, data_module=data_module, loss_factories=loss_factories) + + # ---------------------------------------------------------------------------------- + # set up and run training + # ---------------------------------------------------------------------------------- + + # logger + logger = pl.loggers.TensorBoardLogger("tb_logs", name=cfg.model.model_name) + + callbacks = get_callbacks(cfg, early_stopping=False) + + # calculate number of batches for both labeled and unlabeled data per epoch + limit_train_batches = calculate_train_batches(cfg, dataset) + + # set up trainer + trainer = pl.Trainer( + accelerator="gpu", + devices=1, + max_epochs=cfg.training.max_epochs, + min_epochs=cfg.training.min_epochs, + check_val_every_n_epoch=None, + val_check_interval=val_check_interval, + log_every_n_steps=cfg.training.log_every_n_steps, + callbacks=callbacks, + logger=logger, + # limit_train_batches= int(1), + limit_train_batches=limit_train_batches, + ) + + # train model! + trainer.fit(model=model, datamodule=data_module) + + # save config file + cfg_file_local = os.path.join(results_dir, "config.yaml") + with open(cfg_file_local, "w") as fp: + OmegaConf.save(config=cfg, f=fp.name) + + # Ensure the directory exists + os.makedirs(os.path.dirname(cfg_file_local), exist_ok=True) + + # ---------------------------------------------------------------------------------- + # post-training analysis: labeled frames + # ---------------------------------------------------------------------------------- + hydra_output_directory = os.getcwd() + print("Hydra output directory: {}".format(hydra_output_directory)) + + # get best ckpt + best_ckpt = os.path.abspath(trainer.checkpoint_callback.best_model_path) + + # check if best_ckpt is a file + if not os.path.isfile(best_ckpt): + raise FileNotFoundError("Cannot find checkpoint. Have you trained for too few epochs?") + + # make unaugmented data_loader if necessary + if cfg.training.imgaug != "default": + cfg_pred = cfg.copy() + cfg_pred.training.imgaug = "default" + imgaug_transform_pred = get_imgaug_transform(cfg=cfg_pred) + dataset_pred = get_dataset( + cfg=cfg_pred, data_dir=data_dir, imgaug_transform=imgaug_transform_pred) + data_module_pred = get_data_module( + cfg=cfg_pred, dataset=dataset_pred, video_dir=video_dir) + data_module_pred.setup() + else: + data_module_pred = data_module + + # predict on all labeled frames (train/val/test) + pretty_print_str("Predicting train/val/test images...") + # compute and save frame-wise predictions + preds_file = os.path.join(hydra_output_directory, "predictions.csv") + predict_dataset( + cfg=cfg, + trainer=trainer, + model=model, + data_module=data_module_pred, + ckpt_file=best_ckpt, + preds_file=preds_file, + ) + # compute and save various metrics + try: + compute_metrics(cfg=cfg, preds_file=preds_file, data_module=data_module_pred) + except Exception as e: + print(f"Error computing metrics\n{e}") + + # ---------------------------------------------------------------------------------- + # post-training analysis: predict on OOD labeled frames + # ---------------------------------------------------------------------------------- + # update config file to point to OOD data + csv_file_ood = os.path.join(cfg.data.data_dir, "CollectedData_new.csv") + if os.path.exists(csv_file_ood): + cfg_ood = cfg.copy() + cfg_ood.data.csv_file = csv_file_ood + cfg_ood.training.imgaug = "default" + cfg_ood.training.train_prob = 1 + cfg_ood.training.val_prob = 0 + cfg_ood.training.train_frames = 1 + # build dataset/datamodule + imgaug_transform_ood = get_imgaug_transform(cfg=cfg_ood) + dataset_ood = get_dataset( + cfg=cfg_ood, data_dir=data_dir, imgaug_transform=imgaug_transform_ood + ) + data_module_ood = get_data_module(cfg=cfg_ood, dataset=dataset_ood, video_dir=video_dir) + data_module_ood.setup() + pretty_print_str("Predicting OOD images...") + # compute and save frame-wise predictions + preds_file_ood = os.path.join(hydra_output_directory, "predictions_new.csv") + predict_dataset( + cfg=cfg_ood, + trainer=trainer, + model=model, + data_module=data_module_ood, + ckpt_file=best_ckpt, + preds_file=preds_file_ood, + ) + # compute and save various metrics + try: + compute_metrics( + cfg=cfg_ood, preds_file=preds_file_ood, data_module=data_module_ood + ) + except Exception as e: + print(f"Error computing metrics\n{e}") + + # ---------------------------------------------------------------------------------- + # post-training analysis: unlabeled videos + # ---------------------------------------------------------------------------------- + if cfg.eval.predict_vids_after_training: + pretty_print_str("Predicting videos...") + if cfg.eval.test_videos_directory is None: + filenames = [] + else: + filenames = check_video_paths(return_absolute_path(cfg.eval.test_videos_directory)) + pretty_print_str( + f"Found {len(filenames)} videos to predict on " + f"(in cfg.eval.test_videos_directory)" + ) + + for v, video_file in enumerate(filenames): + assert os.path.isfile(video_file) + pretty_print_str(f"Predicting video: {video_file}...") + # get save name for prediction csv file + video_pred_dir = os.path.join(hydra_output_directory, "video_preds") + video_pred_name = os.path.splitext(os.path.basename(video_file))[0] + prediction_csv_file = os.path.join(video_pred_dir, video_pred_name + ".csv") + inference_with_metrics( + video_file=video_file, + ckpt_file=best_ckpt, + cfg=cfg, + preds_file=prediction_csv_file, + data_module=data_module_pred, + trainer=trainer, + ) + + # ---------------------------------------------------------------------------------- + # clean up + # ---------------------------------------------------------------------------------- + # remove lightning logs + shutil.rmtree(os.path.join(results_dir, "lightning_logs"), ignore_errors=True) + + # change directory back + os.chdir(pwd) + + # clean up memory + del imgaug_transform + del dataset + del data_module + # del data_module_pred + del loss_factories + del model + # del trainer + gc.collect() + torch.cuda.empty_cache() + + return best_ckpt, data_module_pred, trainer + + +def inference_with_metrics( + video_file: str, + cfg: DictConfig, + preds_file: str, + ckpt_file: Optional[str] = None, + data_module: Optional[callable] = None, + trainer: Optional[pl.Trainer] = None, + metrics: bool = True, +) -> pd.DataFrame: + + # update video size in config + video_clip = VideoFileClip(video_file) + cfg.data.image_orig_dims.width = video_clip.w + cfg.data.image_orig_dims.height = video_clip.h + + # compute predictions if they don't already exist + if not os.path.exists(preds_file): + preds_df = predict_single_video( + video_file=video_file, + ckpt_file=ckpt_file, + cfg_file=cfg, + preds_file=preds_file, + data_module=data_module, + trainer=trainer, + ) + else: + preds_df = pd.read_csv(preds_file, header=[0, 1, 2], index_col=0) + + # compute and save various metrics + if metrics: + compute_metrics(cfg=cfg, preds_file=preds_file, data_module=data_module) + + video_clip.close() + del video_clip + gc.collect() + + return preds_df + + +def train_and_infer( + cfg_pipe: DictConfig, + cfg_lp: DictConfig, + rng_seed: int, + data_dir: str, + results_dir: str, + csv_prefix: Optional[str] = None, + new_labels_csv: Optional[str] = None, + n_train_frames: Optional[int] = None, + overwrite: bool = False, +) -> None: + + # Parse params from config + min_steps = cfg_pipe.train_networks.min_steps + max_steps = cfg_pipe.train_networks.max_steps + milestone_steps = cfg_pipe.train_networks.milestone_steps + val_check_interval = cfg_pipe.train_networks.val_check_interval + video_directories = cfg_pipe.train_networks.video_directories + + # Update config + cfg_lp.data.data_dir = data_dir + cfg_lp.training.rng_seed_data_pt = rng_seed + cfg_lp.training.rng_seed_model_pt = rng_seed + if new_labels_csv is not None: + cfg_lp.data.csv_file = new_labels_csv + + # Add iteration-specific fields to the config + cfg_lp.training.max_epochs = 10 + cfg_lp.training.min_epochs = 10 + cfg_lp.training.unfreeze_step = 30 + + # Check if model has already been trained + model_config_checked = os.path.join(results_dir, "config.yaml") + + if os.path.exists(model_config_checked) and not overwrite: + print(f"config.yaml found for rng{rng_seed}. Skipping training.") + + checkpoint_pattern = os.path.join( + results_dir, "tb_logs", "*", "version_*", "checkpoints", "best-checkpoint-*.ckpt") + checkpoint_files = glob.glob(checkpoint_pattern) + + if checkpoint_files: + best_ckpt = sorted(checkpoint_files)[-1] # Get the latest best checkpoint + + data_module = None + trainer = None + + else: + print(f"No config.yaml found for rng{rng_seed}. Training the model.") + best_ckpt, data_module, trainer = train( + cfg=cfg_lp.copy(), + results_dir=results_dir, + min_steps=min_steps, + max_steps=max_steps, + milestone_steps=milestone_steps, + val_check_interval=val_check_interval, + n_train_frames=n_train_frames, + ) + + # Run inference on all InD/OOD videos and compute unsupervised metrics + for video_dir in video_directories: + video_files = \ + [f for f in os.listdir(os.path.join(data_dir, video_dir)) if f.endswith('.mp4')] + for video_file in video_files: + if csv_prefix: + inference_csv_name = f'{csv_prefix}_{video_file.replace(".mp4", ".csv")}' + else: + inference_csv_name = video_file.replace(".mp4", ".csv") + inference_csv = os.path.join(results_dir, "video_preds", inference_csv_name) + + if os.path.exists(inference_csv) and not overwrite: + print(f"Inference file {inference_csv} already exists. " + f"Skipping inference for {video_file}") + else: + print(f"Running inference for {video_file}") + inference_with_metrics( + video_file=os.path.join(data_dir, video_dir, video_file), + cfg=cfg_lp.copy(), + preds_file=inference_csv, + ckpt_file=best_ckpt, + data_module=data_module, + trainer=trainer, + metrics=False, + ) diff --git a/pipelines/pipeline_simple.py b/pipelines/pipeline_simple.py new file mode 100644 index 0000000..2961374 --- /dev/null +++ b/pipelines/pipeline_simple.py @@ -0,0 +1,258 @@ +import argparse +import os + +from lp3d_analysis.io import load_cfgs +from lp3d_analysis.train import train_and_infer + + +def pipeline(config_file: str): + + # ------------------------------------------- + # Setup + # ------------------------------------------- + + # load cfg (pipeline yaml) and cfg_lp (lp yaml) + cfg_pipe, cfg_lp = load_cfgs(config_file) # cfg_lp is a DictConfig, cfg is not + + + # Define + create directories + data_dir = cfg_lp.data.data_dir + pipeline_script_dir = os.path.dirname(os.path.abspath(__file__)) + outputs_dir = os.path.join(pipeline_script_dir, f'../../outputs/{os.path.basename(data_dir)}') + +# # Build list of video names from the video directory +# num_videos, video_names = find_video_names(data_dir, cfg["video_directories"]) +# print(f'Found {num_videos} videos: {video_names}.') + + # ------------------------------------------------------------------------------------- + # Train ensembles + # ------------------------------------------------------------------------------------- + + if cfg_pipe.train_networks.run: + + for rng_seed in cfg_pipe.train_networks.ensemble_seeds: + # Main function call + train_and_infer( + cfg_pipe=cfg_pipe.copy(), + cfg_lp=cfg_lp.copy(), + rng_seed=rng_seed, + data_dir=data_dir, + results_dir=results_dir, # TODO + overwrite=cfg_pipe.train_networks.overwrite, + ) + +# # # # ------------------------------------------------------------------------------------- +# # # # Post-process network outputs to generate potential pseudo labels (chosen in next step) +# # # # ------------------------------------------------------------------------------------- +# pp_dir = os.path.join( +# outputs_dir, +# 'post-processors', +# f"{cfg['pseudo_labeler']}_rng={cfg['ensemble_seeds'][0]}-{cfg['ensemble_seeds'][-1]}" +# ) +# pseudo_labeler = cfg["pseudo_labeler"] +# # Collect input eks csv paths from video names; skip existing +# eks_csv_paths = collect_missing_eks_csv_paths(video_names, pp_dir) +# print(f'Post-processing the following videos using {pseudo_labeler}: {eks_csv_paths}') +# # ||| Main EKS function call ||| pipeline_eks will also handle ensemble_mean baseline +# if pseudo_labeler == "eks" or pseudo_labeler == "ensemble_mean": +# pipeline_eks( +# input_csv_names=eks_csv_paths, +# input_dir=networks_dir, +# data_type=cfg["data_type"], +# pseudo_labeler=pseudo_labeler, +# cfg_lp=cfg_lp.copy(), +# results_dir=pp_dir +# ) + +# # # ------------------------------------------------------------------------------------- +# # # run inference on OOD snippets (if specified) -- using network models +# # # ------------------------------------------------------------------------------------- +# dataset_name = os.path.basename(data_dir) +# if cfg["ood_snippets"]: +# print(f'Starting OOD snippet analysis for {dataset_name}') +# pipeline_ood_snippets( +# cfg=cfg, +# cfg_lp=cfg_lp, +# data_dir=data_dir, +# networks_dir=networks_dir, +# pp_dir=pp_dir, +# pseudo_labeler=pseudo_labeler +# ) + +# # # ------------------------------------------------------------------------------------- +# # # select frames to add to the dataset +# # # ------------------------------------------------------------------------------------- +# selection_strategy = cfg["selection_strategy"] +# print( +# f'Selecting {cfg["n_pseudo_labels"]} pseudo-labels from {num_videos} ' +# f'{cfg["pseudo_labeler"]} outputs using ({selection_strategy} strategy)' +# ) +# hand_labels = pd.read_csv(subsample_path, header=[0, 1, 2], index_col=0) +# # Process each ensemble seed +# for k in cfg["ensemble_seeds"]: +# # Initialize seed_labels with hand labels for this seed +# seed_labels = hand_labels.copy() +# combined_csv_filename = ( +# f"CollectedData_hand={cfg['n_hand_labels']}" +# f"_pseudo={cfg['n_pseudo_labels']}_k={k}_{selection_strategy}.csv" +# ) +# combined_csv_path = os.path.join(hand_pseudo_combined, combined_csv_filename) + +# # Check if frame selection has already been done +# if os.path.exists(combined_csv_path): +# print( +# f'Selected frames already exist at {combined_csv_path}. ' +# f'Skipping frame selection for rng{k}.' +# ) +# seed_labels = pd.read_csv(combined_csv_path, header=[0, 1, 2], index_col=0) + +# else: +# print(f'Selecting pseudo-labels using a {selection_strategy} strategy.') + +# if selection_strategy == 'random': +# seed_labels = select_frames_random( +# cfg=cfg.copy(), +# k=k, +# data_dir=data_dir, +# num_videos=num_videos, +# pp_dir=pp_dir, +# labeled_data_dir=labeled_data_dir, +# seed_labels=seed_labels +# ) + +# elif selection_strategy == 'hand': +# seed_labels = select_frames_hand( +# unsampled_path=unsampled_path, +# n_frames_to_select=cfg['n_pseudo_labels'], +# k=k, +# seed_labels=seed_labels +# ) + +# elif selection_strategy == 'frame_selection': +# frame_selection_path = os.path.join(source_dir, ( +# f"outputs/{os.path.basename(data_dir)}/hand={cfg_lp.training.train_frames}_" +# f"pseudo={cfg['n_pseudo_labels']}/post-quality-frame/" +# )) + +# final_selected_frames_path = select_frames_strategy_pipeline( +# cfg=cfg.copy(), +# cfg_lp=cfg_lp.copy(), +# data_dir=data_dir, +# source_dir=source_dir, +# frame_selection_path=frame_selection_path, +# ) + +# seed_labels = process_and_export_frame_selection( +# cfg=cfg.copy(), +# cfg_lp=cfg_lp.copy(), +# data_dir=data_dir, +# labeled_data_dir=labeled_data_dir, +# final_selected_frames_path=final_selected_frames_path, +# seed_labels=seed_labels +# ) + +# # Filter out columns where the second level of the header contains 'zscore', 'nll', or 'ensemble_std' +# columns_to_keep = [ +# col for col in seed_labels.columns +# if not any(substring in col[2] for substring in ['zscore', 'nll', 'ensemble_std']) +# ] + +# # Select the columns to keep +# seed_labels_filtered = seed_labels[columns_to_keep] + +# # Export the filtered DataFrame to CSV +# seed_labels_filtered.to_csv(combined_csv_path) +# print( +# f"Saved combined hand labels and pseudo labels for seed {k} to " +# f"{combined_csv_path}" +# ) + +# # Check number of labels for this seed +# expected_total_labels = cfg['n_hand_labels'] + cfg["n_pseudo_labels"] +# if seed_labels.shape[0] != expected_total_labels: +# print( +# f"Warning: Number of labels for seed {k} ({seed_labels.shape[0]}) " +# f"does not match expected count ({expected_total_labels})" +# ) +# else: +# print(f"Label count verified for seed {k}: {seed_labels.shape[0]} labels") + +# # # ------------------------------------------------------------------------------------- +# # # Train models on expanded dataset +# # # ------------------------------------------------------------------------------------- + +# csv_prefix = ( +# f"hand={cfg['n_hand_labels']}_rng={k}_" +# f"pseudo={cfg['n_pseudo_labels']}_" +# f"{cfg['pseudo_labeler']}_{cfg['selection_strategy']}_" +# f"rng={cfg['ensemble_seeds'][0]}-{cfg['ensemble_seeds'][-1]}" +# ) +# results_dir = os.path.join(aeks_dir, f"rng{k}") + +# # Run train_and_infer with the combined hand labels and pseudo labels +# train_and_infer( +# cfg=cfg.copy(), +# cfg_lp=cfg_lp.copy(), +# k=k, +# data_dir=data_dir, +# results_dir=results_dir, +# csv_prefix=csv_prefix, +# new_labels_csv=combined_csv_path, # Use the combined CSV file for this seed +# n_train_frames=expected_total_labels +# ) + +# print( +# f"Completed training and inference for seed {k} " +# f"using combined hand labels and pseudo labels" +# ) + +# print("Completed training and inference for all seeds using expanded datasets") + +# # # # ------------------------------------------------------------------------------------- +# # # # Run EKS on expanded dataset inferences +# # # # ------------------------------------------------------------------------------------- +# pseudo_labeler = 'eks' +# # Collect input csv names from video names; skip existing ones +# eks_csv_paths = collect_missing_eks_csv_paths(video_names, aeks_eks_dir) +# print(f'Post-processing the following videos using {pseudo_labeler}: {eks_csv_paths}') +# # ||| Main second round EKS function call ||| +# pipeline_eks( +# input_csv_names=eks_csv_paths, +# input_dir=aeks_dir, +# data_type=cfg["data_type"], +# pseudo_labeler=pseudo_labeler, +# cfg_lp=cfg_lp.copy(), +# results_dir=results_dir +# ) + +# # # ------------------------------------------------------------------------------------- +# # # run inference on OOD snippets (if specified) -- using network models +# # # ------------------------------------------------------------------------------------- +# dataset_name = os.path.basename(data_dir) +# if cfg["ood_snippets"]: +# print(f'Starting OOD snippet analysis for {dataset_name}') +# pipeline_ood_snippets( +# cfg=cfg, +# cfg_lp=cfg_lp, +# data_dir=data_dir, +# networks_dir=aeks_dir, +# pp_dir=aeks_eks_dir, +# pseudo_labeler=pseudo_labeler +# ) +# # ------------------------------------------------------------------------------------------------ +# # ------------------------------------------------------------------------------------------------ +# # ------------------------------------------------------------------------------------------------ + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + parser.add_argument( + '--config', + required=True, + help='absolute path to .yaml configuration file', + type=str, + ) + args = parser.parse_args() + + pipeline(args.config)