From ee3c82553399e65c384b52ed0e1f9599cfd3248a Mon Sep 17 00:00:00 2001
From: Matt Whiteway <themattinthehatt@gmail.com>
Date: Mon, 9 Dec 2024 19:13:56 +0000
Subject: [PATCH] start pipeline construction

---
 README.md                        |   2 +-
 configs/config_mirror-mouse.yaml | 208 +++++++++++++++
 configs/pipeline.yaml            |  50 +++-
 lp3d_analysis/io.py              |  17 ++
 lp3d_analysis/train.py           | 439 +++++++++++++++++++++++++++++++
 pipelines/pipeline_simple.py     | 258 ++++++++++++++++++
 6 files changed, 972 insertions(+), 2 deletions(-)
 create mode 100644 configs/config_mirror-mouse.yaml
 create mode 100644 lp3d_analysis/io.py
 create mode 100644 lp3d_analysis/train.py
 create mode 100644 pipelines/pipeline_simple.py

diff --git a/README.md b/README.md
index c7414f8..0f32c49 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ sudo apt install ffmpeg
 NOTE: do *not* do this if you are setting up this repo in a Lightning Studio.
 
 ```
-conda create --yes --name labeler python=3.8
+conda create --yes --name labeler python=3.10
 conda activate labeler
 ```
 
diff --git a/configs/config_mirror-mouse.yaml b/configs/config_mirror-mouse.yaml
new file mode 100644
index 0000000..e201655
--- /dev/null
+++ b/configs/config_mirror-mouse.yaml
@@ -0,0 +1,208 @@
+data:
+  # dimensions of training images
+  image_orig_dims:
+    height: 406
+    width: 396
+  # resize dimensions to streamline model creation
+  image_resize_dims:
+    height: 256
+    width: 256
+  # ABSOLUTE path to data directory
+  data_dir: /teamspace/studios/this_studio/data/mirror-mouse
+  # ABSOLUTE path to unlabeled videos' directory. the below is just a toy example for running Getting Started scripts
+  video_dir: videos_test
+  
+  # location of labels; for example script, this should be relative to `data_dir`
+  csv_file: CollectedData.csv
+  # downsample heatmaps - 2 | 3
+  downsample_factor: 2
+  # total number of keypoints
+  num_keypoints: 14
+  # keypoint names
+  keypoint_names:
+    - paw1LH_top
+    - paw2LF_top
+    - paw3RF_top
+    - paw4RH_top
+    - tailBase_top
+    - tailMid_top
+    - nose_top
+    # - obs_top
+    - paw1LH_bot
+    - paw2LF_bot
+    - paw3RF_bot
+    - paw4RH_bot
+    - tailBase_bot
+    - tailMid_bot
+    - nose_bot
+    # - obsHigh_bot
+    # - obsLow_bot
+
+  # for mirrored setups with all keypoints defined in same csv file, define matching
+  # columns for different keypoints (assumes x-y-x-y interleaving)
+  # each list corresponds to a single view, so in the example below there are 2 views
+  # keypoint 0 is from view 0 and matches up with keypoint 8 from view 2
+  # columns that correspond to keypoints only labeled in a single view are omitted
+  # TODO: what if a keypoint is labeled in more than 1 but not all views?
+  # this info is only used for the multiview pca loss
+  
+  # mirrored_column_matches:
+  #   - [0, 1, 2, 3, 4, 5, 6]
+  #   - [8, 9, 10, 11, 12, 13, 14]
+  # columns_for_singleview_pca: [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14]
+
+  mirrored_column_matches:
+    - [0, 1, 2, 3, 4, 5, 6]
+    - [7, 8, 9, 10, 11, 12, 13]
+  
+  columns_for_singleview_pca: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
+
+training:
+  # select from one of several predefined image/video augmentation pipelines
+  # default- resizing only
+  # dlc- imgaug pipeline implemented in DLC 2.0 package
+  # dlc-top-down- dlc augmentations plus vertical and horizontal flips
+  imgaug: dlc
+  # batch size of labeled data during training
+  train_batch_size: 10 #Tommy: original val 8
+  # batch size of labeled data during validation
+  val_batch_size: 48
+  # batch size of labeled data during test
+  test_batch_size: 48
+  # fraction of labeled data used for training
+  train_prob: 0.8
+  # fraction of labeled data used for validation (remaining used for test)
+  val_prob: 0.1
+  # <=1 - fraction of total train frames (determined by `train_prob`) used for training
+  # >1 - number of total train frames used for training
+  train_frames: 100
+  # number of gpus to train a single model
+  num_gpus: 1
+  # number of cpu workers for data loaders
+  num_workers: 4
+  # epochs over which to assess validation metrics for early stopping
+  early_stop_patience: 3
+  # epoch at which backbone network weights begin updating
+  unfreezing_epoch: 0 #20
+  # max training epochs; training may exit before due to early stopping
+  min_epochs: 100
+  max_epochs: 100
+  # frequency to log training metrics (one step is one batch)
+  log_every_n_steps: 10
+  # frequency to log validation metrics
+  check_val_every_n_epoch: 5
+  # select gpu for training
+  gpu_id: 0
+  # rng seed for labeled batches
+  rng_seed_data_pt: 0
+  # rng seed for weight initialization
+  rng_seed_model_pt: 0
+  # learning rate scheduler
+  # multisteplr | [todo - reducelronplateau]
+  lr_scheduler: multisteplr
+  lr_scheduler_params:
+    multisteplr:
+      milestones: [150, 200, 250]
+      gamma: 0.5
+
+model:
+  # list of unsupervised losses
+  # "pca_singleview" | "pca_multiview" | "temporal" | "unimodal_mse" | "unimodal_kl"
+  losses_to_use: []
+  # backbone network:
+  # resnet18 | resnet34 | resnet50 | resnet101 | resnet152 | resnet50_contrastive
+  # resnet50_animalpose_apose | resnet50_animal_ap10k
+  # resnet50_human_jhmdb | resnet50_human_res_rle | resnet50_human_top_res
+  # efficientnet_b0 | efficientnet_b1 | efficientnet_b2
+  backbone: resnet50_animal_ap10k
+  # prediction mode: regression | heatmap | heatmap_mhcrnn (context)
+  model_type: heatmap
+  # which heatmap loss to use
+  # "mse" | "kl" | "js"
+  heatmap_loss_type: mse
+  # tt expt name
+  model_name: test
+
+dali:
+  general:
+    seed: 123456
+  base:
+    train:
+      sequence_length: 32 # 4 # step = sequence_length by default. done internally
+    predict:
+      # (train_batch_size + base.train.sequence_length) * 2 -> round down to nearest pow of 2
+      sequence_length: 128 # step = sequence_length by default. done internally.
+  context:
+    train: # defaults: sequence_length=5, step=sequence_length
+      batch_size: 16
+    predict: # defaults: sequence_length=5, step=1
+      # (train_batch_size / 4 + context.train.batch_size) * 2 -> round down to nearest pow of 2
+      sequence_length: 96
+
+losses:
+  # loss = projection onto the discarded eigenvectors
+  pca_multiview:
+    # weight in front of PCA loss
+    log_weight: 5.0
+    # predictions should lie within the low-d subspace spanned by these components
+    components_to_keep: 3
+    # absolute error (in pixels) below which pca loss is zeroed out; if null, an empirical
+    # epsilon is computed using the labeled data
+    epsilon: null
+  # loss = projection onto the discarded eigenvectors
+  pca_singleview:
+    # weight in front of PCA loss
+    log_weight: 5.0
+    # predictions should lie within the low-d subspace spanned by components that describe this fraction of variance
+    components_to_keep: 0.99
+    # absolute error (in pixels) below which pca loss is zeroed out; if null, an empirical
+    # epsilon is computed using the labeled data
+    epsilon: null
+  # loss = norm of distance between successive timepoints
+  temporal:
+    # weight in front of temporal loss
+    log_weight: 5.0
+    # for epsilon insensitive rectification
+    # (in pixels; diffs below this are not penalized)
+    epsilon: 20.0
+    # nan removal value.
+    # (in prob; heatmaps with max prob values are removed)
+    prob_threshold: 0.05
+
+eval:
+  # paths to the hydra config files in the output folder, OR absolute paths to such folders.
+  hydra_paths: [" "]
+  # predict?
+  predict_vids_after_training: true
+  # save .mp4?
+  save_vids_after_training: false
+  fiftyone:
+    # will be the name of the dataset (Mongo DB) created by FiftyOne. for video dataset, we will append dataset_name + "_video"
+    dataset_name: mirror-mouse
+    # if you want to manually provide a different model name to be displayed in FiftyOne
+    model_display_names: ["test_model"]
+    # whether to launch the app from the script (True), or from ipython (and have finer control over the outputs)
+    launch_app_from_script: false
+
+    remote: true # for LAI, must be False
+    address: 127.0.0.1 # ip to launch the app on.
+    port: 5151 # port to launch the app on.
+
+  # str with an absolute path to a directory containing videos for prediction.
+  test_videos_directory: null
+  # str with an absolute path to directory in which you want to save .csv with predictions
+  saved_vid_preds_dir: null
+  # confidence threshold for plotting a vid
+  confidence_thresh_for_vid: 0.90
+  # str with absolute path to the video file you want plotted with keypoints
+  video_file_to_plot: null
+  # a list of strings, each points to a .csv file with predictions of a given model to the same video. will be combined with video_file_to_plot to make a visualization
+  pred_csv_files_to_plot: [" "]
+
+callbacks:
+  anneal_weight:
+    attr_name: total_unsupervised_importance
+    init_val: 0.0
+    increase_factor: 0.01
+    final_val: 1.0
+    freeze_until_epoch: 0
\ No newline at end of file
diff --git a/configs/pipeline.yaml b/configs/pipeline.yaml
index 5df65b0..df2e255 100644
--- a/configs/pipeline.yaml
+++ b/configs/pipeline.yaml
@@ -1 +1,49 @@
-train_models: true 
\ No newline at end of file
+# absolute path to lp yaml file
+lightning_pose_config: /teamspace/studios/this_studio/lp3d-analysis/configs/config_mirror-mouse.yaml
+# lightning_pose_config: /teamspace/studios/this_studio/lp3d-analysis/configs/config_crim13.yaml
+
+# [needed?] pipeline seed for initial data split
+pipeline_seeds: 0
+
+# initial training of an ensemble of networks
+train_networks:
+  # run this section?
+  run: True
+  # overwrite previous results?
+  overwrite: False 
+  # pose estimation data type
+  # data_type: lp
+  # ensemble seeds
+  ensemble_seeds:
+    - 0
+    - 1
+    - 2
+    - 3
+    - 4
+  # number of ground truth labels for training
+  n_hand_labels: 100
+  # model type
+  model_types:
+    - supervised
+    - context
+  # training parameters
+  min_steps: 5000
+  max_steps: 5000
+  milestone_steps: [2000, 3000, 4000]
+  val_check_interval: 50
+  train_check_interval: 10
+
+# post-processing options
+post_processing:
+  eks_singleview:
+    run: False
+    overwrite: False
+  eks_multiview:
+    run: False
+    overwrite: False
+
+# visualization options
+visualization:
+  run: False
+  overwrite: False
+  ens_var_plots: False
diff --git a/lp3d_analysis/io.py b/lp3d_analysis/io.py
new file mode 100644
index 0000000..1868ef6
--- /dev/null
+++ b/lp3d_analysis/io.py
@@ -0,0 +1,17 @@
+import yaml
+from omegaconf import DictConfig
+
+
+def load_cfgs(config_file: str):
+    # Load pipeline config file
+    with open(config_file, "r") as file:
+        cfg_pipe = yaml.safe_load(file)
+    cfg_pipe = DictConfig(cfg_pipe)
+
+    # Load lightning pose config file from the path specified in pipeline config
+    lightning_pose_config_path = cfg_pipe.get("lightning_pose_config")
+    with open(lightning_pose_config_path, "r") as file:
+        lightning_pose_cfg = yaml.safe_load(file)
+
+    cfg_lp = DictConfig(lightning_pose_cfg)
+    return cfg_pipe, cfg_lp
diff --git a/lp3d_analysis/train.py b/lp3d_analysis/train.py
new file mode 100644
index 0000000..01d464e
--- /dev/null
+++ b/lp3d_analysis/train.py
@@ -0,0 +1,439 @@
+"""Functions for training."""
+
+import gc
+import glob
+import os
+import random
+import shutil
+from typing import List, Optional, Tuple
+
+import lightning.pytorch as pl
+import numpy as np
+import pandas as pd
+import torch
+from lightning_pose.utils import pretty_print_cfg, pretty_print_str
+from lightning_pose.utils.io import (check_video_paths,
+                                     return_absolute_data_paths,
+                                     return_absolute_path)
+from lightning_pose.utils.predictions import (predict_dataset,
+                                              predict_single_video)
+from lightning_pose.utils.scripts import (  # get_callbacks,
+    calculate_train_batches, compute_metrics, get_data_module, get_dataset,
+    get_imgaug_transform, get_loss_factories, get_model)
+from moviepy.editor import VideoFileClip
+from omegaconf import DictConfig, OmegaConf
+from typeguard import typechecked
+
+
+@typechecked
+def get_callbacks(
+    cfg: DictConfig,
+    early_stopping=True,
+    lr_monitor=True,
+    ckpt_model=True,
+    backbone_unfreeze=True,
+) -> List:
+
+    callbacks = []
+
+    if early_stopping:
+        early_stopping = pl.callbacks.EarlyStopping(
+            monitor="val_supervised_loss",
+            patience=cfg.training.early_stop_patience,
+            mode="min",
+        )
+        callbacks.append(early_stopping)
+
+    if lr_monitor:
+        lr_monitor = pl.callbacks.LearningRateMonitor(logging_interval="epoch")
+        callbacks.append(lr_monitor)
+
+    if ckpt_model:
+        ckpt_callback = pl.callbacks.model_checkpoint.ModelCheckpoint(
+            monitor="val_supervised_loss",
+            mode="min",
+            save_top_k=1,
+            filename="best-checkpoint-step={step}-val_loss={val_supervised_loss:.2f}",
+            save_last=True,
+        )
+        callbacks.append(ckpt_callback)
+
+    if backbone_unfreeze:
+        transfer_unfreeze_callback = pl.callbacks.BackboneFinetuning(
+            unfreeze_backbone_at_epoch=cfg.training.unfreezing_epoch,
+            backbone_initial_ratio_lr=0.1,
+            should_align=True,
+            train_bn=True,
+        )
+        callbacks.append(transfer_unfreeze_callback)
+
+    # we just need this callback for unsupervised models
+    if (cfg.model.losses_to_use != []) and (cfg.model.losses_to_use is not None):
+        anneal_weight_callback = AnnealWeight(**cfg.callbacks.anneal_weight)
+        callbacks.append(anneal_weight_callback)
+
+    return callbacks
+
+
+def train(
+    cfg: DictConfig,
+    results_dir: str,
+    min_steps: int,
+    max_steps: int,
+    milestone_steps: List[int],
+    val_check_interval: int,
+    n_train_frames: Optional[int] = None
+) -> Tuple[str, pl.LightningDataModule, pl.Trainer]:
+
+    MIN_STEPS = min_steps
+    MAX_STEPS = max_steps
+    MILESTONE_STEPS = milestone_steps
+    cfg.training.train_frames = n_train_frames
+
+    # mimic hydra, change dir into results dir
+    pwd = os.getcwd()
+    os.makedirs(results_dir, exist_ok=True)
+    os.chdir(results_dir)
+
+    # reset all seeds
+    seed = 0
+    os.environ["PYTHONHASHSEED"] = str(seed)
+    torch.manual_seed(seed)
+    np.random.seed(seed)
+    random.seed(seed)
+    torch.backends.cudnn.benchmark = False
+    torch.backends.cudnn.deterministic = True
+
+    # ----------------------------------------------------------------------------------
+    # set up data/model objects
+    # ----------------------------------------------------------------------------------
+
+    pretty_print_cfg(cfg)
+
+    data_dir, video_dir = return_absolute_data_paths(data_cfg=cfg.data)
+
+    # imgaug transform
+    imgaug_transform = get_imgaug_transform(cfg=cfg)
+
+    # dataset
+    dataset = get_dataset(cfg=cfg, data_dir=data_dir, imgaug_transform=imgaug_transform)
+
+    # datamodule; breaks up dataset into train/val/test
+    data_module = get_data_module(cfg=cfg, dataset=dataset, video_dir=video_dir)
+
+    data_module.setup()
+
+    num_train_frames = len(data_module.train_dataset)
+    if n_train_frames:
+        num_train_frames = n_train_frames
+
+    step_per_epoch = num_train_frames / cfg.training.train_batch_size
+    print(f'step_per_epoch={step_per_epoch}')
+
+    cfg.training.max_epochs = int(MAX_STEPS / step_per_epoch)
+    cfg.training.min_epochs = int(MIN_STEPS / step_per_epoch)
+    cfg.training.lr_scheduler_params.multisteplr.milestones = \
+        [int(s / step_per_epoch) for s in MILESTONE_STEPS]
+
+    # build loss factory which orchestrates different losses
+    loss_factories = get_loss_factories(cfg=cfg, data_module=data_module)
+
+    # model
+    model = get_model(cfg=cfg, data_module=data_module, loss_factories=loss_factories)
+
+    # ----------------------------------------------------------------------------------
+    # set up and run training
+    # ----------------------------------------------------------------------------------
+
+    # logger
+    logger = pl.loggers.TensorBoardLogger("tb_logs", name=cfg.model.model_name)
+
+    callbacks = get_callbacks(cfg, early_stopping=False)
+
+    # calculate number of batches for both labeled and unlabeled data per epoch
+    limit_train_batches = calculate_train_batches(cfg, dataset)
+
+    # set up trainer
+    trainer = pl.Trainer(
+        accelerator="gpu",
+        devices=1,
+        max_epochs=cfg.training.max_epochs,
+        min_epochs=cfg.training.min_epochs,
+        check_val_every_n_epoch=None,
+        val_check_interval=val_check_interval,
+        log_every_n_steps=cfg.training.log_every_n_steps,
+        callbacks=callbacks,
+        logger=logger,
+        # limit_train_batches= int(1),
+        limit_train_batches=limit_train_batches,
+    )
+
+    # train model!
+    trainer.fit(model=model, datamodule=data_module)
+
+    # save config file
+    cfg_file_local = os.path.join(results_dir, "config.yaml")
+    with open(cfg_file_local, "w") as fp:
+        OmegaConf.save(config=cfg, f=fp.name)
+
+    # Ensure the directory exists
+    os.makedirs(os.path.dirname(cfg_file_local), exist_ok=True)
+
+    # ----------------------------------------------------------------------------------
+    # post-training analysis: labeled frames
+    # ----------------------------------------------------------------------------------
+    hydra_output_directory = os.getcwd()
+    print("Hydra output directory: {}".format(hydra_output_directory))
+
+    # get best ckpt
+    best_ckpt = os.path.abspath(trainer.checkpoint_callback.best_model_path)
+
+    # check if best_ckpt is a file
+    if not os.path.isfile(best_ckpt):
+        raise FileNotFoundError("Cannot find checkpoint. Have you trained for too few epochs?")
+
+    # make unaugmented data_loader if necessary
+    if cfg.training.imgaug != "default":
+        cfg_pred = cfg.copy()
+        cfg_pred.training.imgaug = "default"
+        imgaug_transform_pred = get_imgaug_transform(cfg=cfg_pred)
+        dataset_pred = get_dataset(
+            cfg=cfg_pred, data_dir=data_dir, imgaug_transform=imgaug_transform_pred)
+        data_module_pred = get_data_module(
+            cfg=cfg_pred, dataset=dataset_pred, video_dir=video_dir)
+        data_module_pred.setup()
+    else:
+        data_module_pred = data_module
+
+    # predict on all labeled frames (train/val/test)
+    pretty_print_str("Predicting train/val/test images...")
+    # compute and save frame-wise predictions
+    preds_file = os.path.join(hydra_output_directory, "predictions.csv")
+    predict_dataset(
+        cfg=cfg,
+        trainer=trainer,
+        model=model,
+        data_module=data_module_pred,
+        ckpt_file=best_ckpt,
+        preds_file=preds_file,
+    )
+    # compute and save various metrics
+    try:
+        compute_metrics(cfg=cfg, preds_file=preds_file, data_module=data_module_pred)
+    except Exception as e:
+        print(f"Error computing metrics\n{e}")
+
+    # ----------------------------------------------------------------------------------
+    # post-training analysis: predict on OOD labeled frames
+    # ----------------------------------------------------------------------------------
+    # update config file to point to OOD data
+    csv_file_ood = os.path.join(cfg.data.data_dir, "CollectedData_new.csv")
+    if os.path.exists(csv_file_ood):
+        cfg_ood = cfg.copy()
+        cfg_ood.data.csv_file = csv_file_ood
+        cfg_ood.training.imgaug = "default"
+        cfg_ood.training.train_prob = 1
+        cfg_ood.training.val_prob = 0
+        cfg_ood.training.train_frames = 1
+        # build dataset/datamodule
+        imgaug_transform_ood = get_imgaug_transform(cfg=cfg_ood)
+        dataset_ood = get_dataset(
+            cfg=cfg_ood, data_dir=data_dir, imgaug_transform=imgaug_transform_ood
+        )
+        data_module_ood = get_data_module(cfg=cfg_ood, dataset=dataset_ood, video_dir=video_dir)
+        data_module_ood.setup()
+        pretty_print_str("Predicting OOD images...")
+        # compute and save frame-wise predictions
+        preds_file_ood = os.path.join(hydra_output_directory, "predictions_new.csv")
+        predict_dataset(
+            cfg=cfg_ood,
+            trainer=trainer,
+            model=model,
+            data_module=data_module_ood,
+            ckpt_file=best_ckpt,
+            preds_file=preds_file_ood,
+        )
+        # compute and save various metrics
+        try:
+            compute_metrics(
+                cfg=cfg_ood, preds_file=preds_file_ood, data_module=data_module_ood
+            )
+        except Exception as e:
+            print(f"Error computing metrics\n{e}")
+
+    # ----------------------------------------------------------------------------------
+    # post-training analysis: unlabeled videos
+    # ----------------------------------------------------------------------------------
+    if cfg.eval.predict_vids_after_training:
+        pretty_print_str("Predicting videos...")
+        if cfg.eval.test_videos_directory is None:
+            filenames = []
+        else:
+            filenames = check_video_paths(return_absolute_path(cfg.eval.test_videos_directory))
+            pretty_print_str(
+                f"Found {len(filenames)} videos to predict on "
+                f"(in cfg.eval.test_videos_directory)"
+            )
+
+        for v, video_file in enumerate(filenames):
+            assert os.path.isfile(video_file)
+            pretty_print_str(f"Predicting video: {video_file}...")
+            # get save name for prediction csv file
+            video_pred_dir = os.path.join(hydra_output_directory, "video_preds")
+            video_pred_name = os.path.splitext(os.path.basename(video_file))[0]
+            prediction_csv_file = os.path.join(video_pred_dir, video_pred_name + ".csv")
+            inference_with_metrics(
+                video_file=video_file,
+                ckpt_file=best_ckpt,
+                cfg=cfg,
+                preds_file=prediction_csv_file,
+                data_module=data_module_pred,
+                trainer=trainer,
+            )
+
+    # ----------------------------------------------------------------------------------
+    # clean up
+    # ----------------------------------------------------------------------------------
+    # remove lightning logs
+    shutil.rmtree(os.path.join(results_dir, "lightning_logs"), ignore_errors=True)
+
+    # change directory back
+    os.chdir(pwd)
+
+    # clean up memory
+    del imgaug_transform
+    del dataset
+    del data_module
+    # del data_module_pred
+    del loss_factories
+    del model
+    # del trainer
+    gc.collect()
+    torch.cuda.empty_cache()
+
+    return best_ckpt, data_module_pred, trainer
+
+
+def inference_with_metrics(
+    video_file: str,
+    cfg: DictConfig,
+    preds_file: str,
+    ckpt_file: Optional[str] = None,
+    data_module: Optional[callable] = None,
+    trainer: Optional[pl.Trainer] = None,
+    metrics: bool = True,
+) -> pd.DataFrame:
+
+    # update video size in config
+    video_clip = VideoFileClip(video_file)
+    cfg.data.image_orig_dims.width = video_clip.w
+    cfg.data.image_orig_dims.height = video_clip.h
+
+    # compute predictions if they don't already exist
+    if not os.path.exists(preds_file):
+        preds_df = predict_single_video(
+            video_file=video_file,
+            ckpt_file=ckpt_file,
+            cfg_file=cfg,
+            preds_file=preds_file,
+            data_module=data_module,
+            trainer=trainer,
+        )
+    else:
+        preds_df = pd.read_csv(preds_file, header=[0, 1, 2], index_col=0)
+
+    # compute and save various metrics
+    if metrics:
+        compute_metrics(cfg=cfg, preds_file=preds_file, data_module=data_module)
+
+    video_clip.close()
+    del video_clip
+    gc.collect()
+
+    return preds_df
+
+
+def train_and_infer(
+    cfg_pipe: DictConfig,
+    cfg_lp: DictConfig,
+    rng_seed: int,
+    data_dir: str,
+    results_dir: str,
+    csv_prefix: Optional[str] = None,
+    new_labels_csv: Optional[str] = None,
+    n_train_frames: Optional[int] = None,
+    overwrite: bool = False,
+) -> None:
+
+    # Parse params from config
+    min_steps = cfg_pipe.train_networks.min_steps
+    max_steps = cfg_pipe.train_networks.max_steps
+    milestone_steps = cfg_pipe.train_networks.milestone_steps
+    val_check_interval = cfg_pipe.train_networks.val_check_interval
+    video_directories = cfg_pipe.train_networks.video_directories
+
+    # Update config
+    cfg_lp.data.data_dir = data_dir
+    cfg_lp.training.rng_seed_data_pt = rng_seed
+    cfg_lp.training.rng_seed_model_pt = rng_seed
+    if new_labels_csv is not None:
+        cfg_lp.data.csv_file = new_labels_csv
+
+    # Add iteration-specific fields to the config
+    cfg_lp.training.max_epochs = 10
+    cfg_lp.training.min_epochs = 10
+    cfg_lp.training.unfreeze_step = 30
+
+    # Check if model has already been trained
+    model_config_checked = os.path.join(results_dir, "config.yaml")
+
+    if os.path.exists(model_config_checked) and not overwrite:
+        print(f"config.yaml found for rng{rng_seed}. Skipping training.")
+
+        checkpoint_pattern = os.path.join(
+            results_dir, "tb_logs", "*", "version_*", "checkpoints", "best-checkpoint-*.ckpt")
+        checkpoint_files = glob.glob(checkpoint_pattern)
+
+        if checkpoint_files:
+            best_ckpt = sorted(checkpoint_files)[-1]  # Get the latest best checkpoint
+
+        data_module = None
+        trainer = None
+
+    else:
+        print(f"No config.yaml found for rng{rng_seed}. Training the model.")
+        best_ckpt, data_module, trainer = train(
+            cfg=cfg_lp.copy(),
+            results_dir=results_dir,
+            min_steps=min_steps,
+            max_steps=max_steps,
+            milestone_steps=milestone_steps,
+            val_check_interval=val_check_interval,
+            n_train_frames=n_train_frames,
+        )
+
+    # Run inference on all InD/OOD videos and compute unsupervised metrics
+    for video_dir in video_directories:
+        video_files = \
+            [f for f in os.listdir(os.path.join(data_dir, video_dir)) if f.endswith('.mp4')]
+        for video_file in video_files:
+            if csv_prefix:
+                inference_csv_name = f'{csv_prefix}_{video_file.replace(".mp4", ".csv")}'
+            else:
+                inference_csv_name = video_file.replace(".mp4", ".csv")
+            inference_csv = os.path.join(results_dir, "video_preds", inference_csv_name)
+
+            if os.path.exists(inference_csv) and not overwrite:
+                print(f"Inference file {inference_csv} already exists. "
+                      f"Skipping inference for {video_file}")
+            else:
+                print(f"Running inference for {video_file}")
+                inference_with_metrics(
+                    video_file=os.path.join(data_dir, video_dir, video_file),
+                    cfg=cfg_lp.copy(),
+                    preds_file=inference_csv,
+                    ckpt_file=best_ckpt,
+                    data_module=data_module,
+                    trainer=trainer,
+                    metrics=False,
+                )
diff --git a/pipelines/pipeline_simple.py b/pipelines/pipeline_simple.py
new file mode 100644
index 0000000..2961374
--- /dev/null
+++ b/pipelines/pipeline_simple.py
@@ -0,0 +1,258 @@
+import argparse
+import os
+
+from lp3d_analysis.io import load_cfgs
+from lp3d_analysis.train import train_and_infer
+
+
+def pipeline(config_file: str):
+
+    # -------------------------------------------
+    # Setup
+    # -------------------------------------------
+
+    # load cfg (pipeline yaml) and cfg_lp (lp yaml)
+    cfg_pipe, cfg_lp = load_cfgs(config_file)  # cfg_lp is a DictConfig, cfg is not
+    
+
+    # Define + create directories
+    data_dir = cfg_lp.data.data_dir
+    pipeline_script_dir = os.path.dirname(os.path.abspath(__file__))
+    outputs_dir = os.path.join(pipeline_script_dir, f'../../outputs/{os.path.basename(data_dir)}')
+
+#     # Build list of video names from the video directory
+#     num_videos, video_names = find_video_names(data_dir, cfg["video_directories"])
+#     print(f'Found {num_videos} videos: {video_names}.')
+
+    # -------------------------------------------------------------------------------------
+    # Train ensembles
+    # -------------------------------------------------------------------------------------
+
+    if cfg_pipe.train_networks.run:
+        
+        for rng_seed in cfg_pipe.train_networks.ensemble_seeds:
+            # Main function call
+            train_and_infer(
+                cfg_pipe=cfg_pipe.copy(),
+                cfg_lp=cfg_lp.copy(),
+                rng_seed=rng_seed,
+                data_dir=data_dir,
+                results_dir=results_dir,  # TODO
+                overwrite=cfg_pipe.train_networks.overwrite,
+            )
+
+#     # # # -------------------------------------------------------------------------------------
+#     # # # Post-process network outputs to generate potential pseudo labels (chosen in next step)
+#     # # # -------------------------------------------------------------------------------------
+#     pp_dir = os.path.join(
+#         outputs_dir,
+#         'post-processors',
+#         f"{cfg['pseudo_labeler']}_rng={cfg['ensemble_seeds'][0]}-{cfg['ensemble_seeds'][-1]}"
+#     )
+#     pseudo_labeler = cfg["pseudo_labeler"]
+#     # Collect input eks csv paths from video names; skip existing
+#     eks_csv_paths = collect_missing_eks_csv_paths(video_names, pp_dir)
+#     print(f'Post-processing the following videos using {pseudo_labeler}: {eks_csv_paths}')
+#     # ||| Main EKS function call ||| pipeline_eks will also handle ensemble_mean baseline
+#     if pseudo_labeler == "eks" or pseudo_labeler == "ensemble_mean":
+#         pipeline_eks(
+#             input_csv_names=eks_csv_paths,
+#             input_dir=networks_dir,
+#             data_type=cfg["data_type"],
+#             pseudo_labeler=pseudo_labeler,
+#             cfg_lp=cfg_lp.copy(),
+#             results_dir=pp_dir
+#         )
+
+#     # # -------------------------------------------------------------------------------------
+#     # # run inference on OOD snippets (if specified) -- using network models
+#     # # -------------------------------------------------------------------------------------
+#     dataset_name = os.path.basename(data_dir)
+#     if cfg["ood_snippets"]:
+#         print(f'Starting OOD snippet analysis for {dataset_name}')
+#         pipeline_ood_snippets(
+#             cfg=cfg,
+#             cfg_lp=cfg_lp,
+#             data_dir=data_dir,
+#             networks_dir=networks_dir,
+#             pp_dir=pp_dir,
+#             pseudo_labeler=pseudo_labeler
+#         )
+
+#     # # -------------------------------------------------------------------------------------
+#     # # select frames to add to the dataset
+#     # # -------------------------------------------------------------------------------------
+#     selection_strategy = cfg["selection_strategy"]
+#     print(
+#         f'Selecting {cfg["n_pseudo_labels"]} pseudo-labels from {num_videos} '
+#         f'{cfg["pseudo_labeler"]} outputs using ({selection_strategy} strategy)'
+#     )
+#     hand_labels = pd.read_csv(subsample_path, header=[0, 1, 2], index_col=0)
+#     # Process each ensemble seed
+#     for k in cfg["ensemble_seeds"]:
+#         # Initialize seed_labels with hand labels for this seed
+#         seed_labels = hand_labels.copy()
+#         combined_csv_filename = (
+#             f"CollectedData_hand={cfg['n_hand_labels']}"
+#             f"_pseudo={cfg['n_pseudo_labels']}_k={k}_{selection_strategy}.csv"
+#         )
+#         combined_csv_path = os.path.join(hand_pseudo_combined, combined_csv_filename)
+
+#         # Check if frame selection has already been done
+#         if os.path.exists(combined_csv_path):
+#             print(
+#                 f'Selected frames already exist at {combined_csv_path}. '
+#                 f'Skipping frame selection for rng{k}.'
+#             )
+#             seed_labels = pd.read_csv(combined_csv_path, header=[0, 1, 2], index_col=0)
+
+#         else:
+#             print(f'Selecting pseudo-labels using a {selection_strategy} strategy.')
+
+#             if selection_strategy == 'random':
+#                 seed_labels = select_frames_random(
+#                     cfg=cfg.copy(),
+#                     k=k,
+#                     data_dir=data_dir,
+#                     num_videos=num_videos,
+#                     pp_dir=pp_dir,
+#                     labeled_data_dir=labeled_data_dir,
+#                     seed_labels=seed_labels
+#                 )
+
+#             elif selection_strategy == 'hand':
+#                 seed_labels = select_frames_hand(
+#                     unsampled_path=unsampled_path,
+#                     n_frames_to_select=cfg['n_pseudo_labels'],
+#                     k=k,
+#                     seed_labels=seed_labels
+#                 )
+                
+#             elif selection_strategy == 'frame_selection':
+#                 frame_selection_path = os.path.join(source_dir, (
+#                     f"outputs/{os.path.basename(data_dir)}/hand={cfg_lp.training.train_frames}_"
+#                     f"pseudo={cfg['n_pseudo_labels']}/post-quality-frame/"
+#                 ))
+
+#                 final_selected_frames_path = select_frames_strategy_pipeline(
+#                     cfg=cfg.copy(),
+#                     cfg_lp=cfg_lp.copy(),
+#                     data_dir=data_dir,
+#                     source_dir=source_dir,
+#                     frame_selection_path=frame_selection_path,
+#                 )
+
+#                 seed_labels = process_and_export_frame_selection(
+#                     cfg=cfg.copy(),
+#                     cfg_lp=cfg_lp.copy(),
+#                     data_dir=data_dir,
+#                     labeled_data_dir=labeled_data_dir,
+#                     final_selected_frames_path=final_selected_frames_path,
+#                     seed_labels=seed_labels
+#                 )
+            
+#             # Filter out columns where the second level of the header contains 'zscore', 'nll', or 'ensemble_std'
+#             columns_to_keep = [
+#                 col for col in seed_labels.columns
+#                 if not any(substring in col[2] for substring in ['zscore', 'nll', 'ensemble_std'])
+#             ]
+
+#             # Select the columns to keep
+#             seed_labels_filtered = seed_labels[columns_to_keep]
+
+#             # Export the filtered DataFrame to CSV
+#             seed_labels_filtered.to_csv(combined_csv_path)
+#             print(
+#                 f"Saved combined hand labels and pseudo labels for seed {k} to "
+#                 f"{combined_csv_path}"
+#             )
+            
+#         # Check number of labels for this seed
+#         expected_total_labels = cfg['n_hand_labels'] + cfg["n_pseudo_labels"]
+#         if seed_labels.shape[0] != expected_total_labels:
+#             print(
+#                 f"Warning: Number of labels for seed {k} ({seed_labels.shape[0]}) "
+#                 f"does not match expected count ({expected_total_labels})"
+#             )
+#         else:
+#             print(f"Label count verified for seed {k}: {seed_labels.shape[0]} labels")
+
+#         # # -------------------------------------------------------------------------------------
+#         # # Train models on expanded dataset
+#         # # -------------------------------------------------------------------------------------
+
+#         csv_prefix = (
+#             f"hand={cfg['n_hand_labels']}_rng={k}_"
+#             f"pseudo={cfg['n_pseudo_labels']}_"
+#             f"{cfg['pseudo_labeler']}_{cfg['selection_strategy']}_"
+#             f"rng={cfg['ensemble_seeds'][0]}-{cfg['ensemble_seeds'][-1]}"
+#         )
+#         results_dir = os.path.join(aeks_dir, f"rng{k}")
+
+#         # Run train_and_infer with the combined hand labels and pseudo labels
+#         train_and_infer(
+#             cfg=cfg.copy(),
+#             cfg_lp=cfg_lp.copy(),
+#             k=k,
+#             data_dir=data_dir,
+#             results_dir=results_dir,
+#             csv_prefix=csv_prefix,
+#             new_labels_csv=combined_csv_path,  # Use the combined CSV file for this seed
+#             n_train_frames=expected_total_labels
+#         )
+
+#         print(
+#             f"Completed training and inference for seed {k} "
+#             f"using combined hand labels and pseudo labels"
+#         )
+
+#     print("Completed training and inference for all seeds using expanded datasets")
+
+#     # # # -------------------------------------------------------------------------------------
+#     # # # Run EKS on expanded dataset inferences
+#     # # # -------------------------------------------------------------------------------------
+#     pseudo_labeler = 'eks'
+#     # Collect input csv names from video names; skip existing ones
+#     eks_csv_paths = collect_missing_eks_csv_paths(video_names, aeks_eks_dir)
+#     print(f'Post-processing the following videos using {pseudo_labeler}: {eks_csv_paths}')
+#     # ||| Main second round EKS function call |||
+#     pipeline_eks(
+#         input_csv_names=eks_csv_paths,
+#         input_dir=aeks_dir,
+#         data_type=cfg["data_type"],
+#         pseudo_labeler=pseudo_labeler,
+#         cfg_lp=cfg_lp.copy(),
+#         results_dir=results_dir
+#     )
+
+#     # # -------------------------------------------------------------------------------------
+#     # # run inference on OOD snippets (if specified) -- using network models
+#     # # -------------------------------------------------------------------------------------
+#     dataset_name = os.path.basename(data_dir)
+#     if cfg["ood_snippets"]:
+#         print(f'Starting OOD snippet analysis for {dataset_name}')
+#         pipeline_ood_snippets(
+#             cfg=cfg,
+#             cfg_lp=cfg_lp,
+#             data_dir=data_dir,
+#             networks_dir=aeks_dir,
+#             pp_dir=aeks_eks_dir,
+#             pseudo_labeler=pseudo_labeler
+#         )
+# # ------------------------------------------------------------------------------------------------
+# # ------------------------------------------------------------------------------------------------
+# # ------------------------------------------------------------------------------------------------
+
+
+if __name__ == "__main__":
+    
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--config',
+        required=True,
+        help='absolute path to .yaml configuration file',
+        type=str,
+    )
+    args = parser.parse_args()
+
+    pipeline(args.config)