Skip to content

Commit

Permalink
pose_process function for the pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
LennyAharon committed Dec 16, 2024
1 parent 0810ed5 commit 578bd8e
Show file tree
Hide file tree
Showing 4 changed files with 344 additions and 15 deletions.
22 changes: 17 additions & 5 deletions configs/pipeline.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
#TODO
# ask Matt about adding the views variable to the config file so we can pick the views that we have and in that way instead of iterating over files we will iterate over views - which will determine things



# absolute path to lp yaml file
lightning_pose_config: /teamspace/studios/this_studio/lp3d-analysis/configs/config_mirror-mouse-separate.yaml
# lightning_pose_config: /teamspace/studios/this_studio/lp3d-analysis/configs/config_crim13.yaml

# [needed?] pipeline seed for initial data split
pipeline_seeds: 0

intermediate_results_dir: testing
intermediate_results_dir: results_500

# initial training of an ensemble of networks
train_networks:
Expand All @@ -18,8 +23,8 @@ train_networks:
# ensemble seeds
ensemble_seeds:
- 0
# - 1
# - 2
- 1
- 2
# - 3
# - 4
# number of ground truth labels for training
Expand All @@ -40,8 +45,8 @@ train_networks:
# milestone_steps: [2000, 3000, 4000]
# val_check_interval: 50
# train_check_interval: 10
min_steps: 50
max_steps: 50
min_steps: 500
max_steps: 500
milestone_steps: [2000, 3000, 4000]
val_check_interval: 5
train_check_interval: 5
Expand All @@ -54,6 +59,13 @@ post_processing:
eks_multiview:
run: False
overwrite: False
ensemble_mean:
run: True
overwrite: False
ensemble_median:
run: True
overwrite: False


# visualization options
visualization:
Expand Down
196 changes: 196 additions & 0 deletions lp3d_analysis/post_process.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
import os
import pandas as pd
import numpy as np

from omegaconf import DictConfig
from typing import List, Literal
from pathlib import Path

from lightning_pose.utils.scripts import (
compute_metrics,
)

#TODO
#1. in the variables for the function pose_process_ensemble, try to change the issue with the mode variable so we can add the eks mode and things like that
#2. Change the way we are using the cfp_lp.data.csv_file - it will make more sense in the function
#3 change variables names so it will make more sense



def post_process_ensemble(
cfg_lp: DictConfig,
results_dir: str,
model_type: str,
n_labels: int,
seed_range: tuple[int, int],
views: list[str],
mode: Literal['ensemble_mean', 'ensemble_median'],
overwrite: bool,
) -> None:

base_dir = os.path.dirname(results_dir)
ensemble_dir = os.path.join(
base_dir,
f"{model_type}_{n_labels}_{seed_range[0]}-{seed_range[1]}"
)
output_dir = os.path.join(ensemble_dir, mode)
os.makedirs(output_dir, exist_ok=True)

seed_dirs = [
os.path.join(base_dir, f"{model_type}_{n_labels}_{seed}")
for seed in range(seed_range[0], seed_range[1] + 1)
]

new_predictions_files = []

for view in views:
stacked_arrays = []
column_structure = None

for seed_dir in seed_dirs:
pred_file = os.path.join(
seed_dir,
'videos-for-each-labeled-frame',
f'predictions_{view}_new.csv'
)
if os.path.exists(pred_file):
df = pd.read_csv(pred_file, header=[0, 1, 2], index_col=0)
if column_structure is None:
column_structure = df.loc[:, df.columns.get_level_values(2).isin(['x', 'y', 'likelihood'])].columns

# Select only numeric columns (x, y, likelihood)
numeric_cols = df.loc[:, column_structure]

# Convert DataFrame to a 2D array (numeric values only)
stacked_arrays.append(numeric_cols.to_numpy())
else:
print(f"Warning: Could not find predictions file: {pred_file}")

if not stacked_arrays or column_structure is None:
print(f"Could not find predictions for view: {view}")
continue

# Stack all arrays along the third dimension
stacked_arrays = np.stack(stacked_arrays, axis=-1)

# Compute the mean/median along the third dimension
if mode == 'ensemble_mean':
aggregated_array = np.nanmean(stacked_arrays, axis=-1)
elif mode == 'ensemble_median':
aggregated_array = np.nanmedian(stacked_arrays, axis=-1)
else:
print(f"Invalid mode: {mode}")
continue

# Create a new DataFrame with the aggregated data
result_df = pd.DataFrame(
data=aggregated_array,
index=df.index,
columns=column_structure
)

result_df.loc[:,("set", "", "")] = "train"

preds_file = os.path.join(output_dir, f'predictions_{view}_new.csv')
result_df.to_csv(preds_file)
new_predictions_files.append(preds_file)
print(f"Saved ensemble {mode} predictions for {view} view to {preds_file}")

cfg_lp.data.csv_file = ['CollectedData_top_new.csv', 'CollectedData_bot_new.csv'] # try to
cfg_lp.data.view_names = [view]

try:
compute_metrics(cfg=cfg_lp, preds_file=preds_file, data_module=None)
print(f"Successfully computed metrics for {pred_file}")
except Exception as e:
print(f"Error computing metrics\n{e}")












# cfg_lp.data.csv_file = predictions_new_ensembles
# if predictions_new_ensembles and not overwrite:
# for preds_file in predictions_new_ensembles:
# try:
# compute_metrics(cfg=cfg_lp, preds_file=preds_file, data_module=None)
# print(f"Succesfully computed metrics for {preds_file}")
# except Exception as e:
# print(f"Error computing metrics for {preds_file}\n{e}")
# else:
# print("No new predictions to compute metrics on")




# cfg_lp.data.csv_file = preds_file
# print(cfg_lp.data.csv_file)
# cfg_lp.data.view_names = [view]

# try:
# compute_metrics(cfg=cfg_lp, preds_file= preds_file , data_module=None)
# print(f"Succesfully computed metrics for {preds_file}")
# except Exception as e:
# print(f"Error computing metrics\n{e}")






# all_predictions.append(df)
# else:
# print(f"Warning: Could not find predictions file: {pred_file}")

# if not all_predictions:
# print(f"Could not find predictions for view: {view}")
# continue

# combined_df = pd.concat(all_predictions, axis =0)
# print(combined_df.head())



# # group by any identifying columns ( assuming frame/ timestamps columns exists)
# # modify these groupby columns based on csv structure
# group_cols = [col for col in combined_df]






# pp_dir = os.path.join(
# # outputs_dir,
# # 'post-processors',
# # f"{cfg['pseudo_labeler']}_rng={cfg['ensemble_seeds'][0]}-{cfg['ensemble_seeds'][-1]}"
# # )




# Here will start looping over the post processes
# want to check if want to run the particular post process and have a couple of if statmetns
# combine predictions from multiple models in the ensemble if want ensmble_mean run this function and if want eks run this
# make a new py file called pose processing and basically want to load predictions from different models, want to take the mean / median of all the x and y and also of likelihood - that will be the ensemble mean and median
# that will all be saved as a data frame in csv file inside the supervised_100_0-1 directory and make another directory for each post processor - ensemble_mean, ensemble_median
# once have that data frame I can run compute metrics from the new set of predictions and it will do the pixel_error







# after loop through all the seeds want to run through the post=processes
# for this I need to implement ensemble mean and median
# take the predictions files in the videos-for-each-labeled-frame and load the csv files from each seed and each view
# I want the prediction files from supervised-100-0 and supervised 100-1
#. I will have to make a new directory supervised_100_0 and supervised_100_1 and the directory for the ensemble will be supervised_100_0-1 (if had more it is 0-5 for example)
83 changes: 81 additions & 2 deletions lp3d_analysis/utils.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,93 @@
import os
import numpy as np
import pandas as pd

from lightning_pose.utils.scripts import (
compute_metrics,
)

from omegaconf import DictConfig


#TODO
# 1. need to make a decision about the naming of the output file and how to access the correct model name because it is not part of the results_dir
# 2. change names of variables so will look better in the code and make more sense
# 3. should I use cfg_lp_copy?

def extract_ood_frame_predictions(
cfg_lp: DictConfig,
data_dir: str,
results_dir: str,
overwrite: bool,
video_dir: str,
) -> None:

pass
#pass

new_csv_files = [f for f in os.listdir(data_dir) if f.endswith('_new.csv')]
# Use cfg_lp instead of cfg_lp_copy
print(f" the nwe csv files are {new_csv_files}")

for csv_file in new_csv_files:
# load each of the new csv files and iterate through the index
prediction_name = '_'.join(csv_file.split('_')[1:])
preds_file = os.path.join(results_dir, video_dir , f'predictions_{prediction_name}') # here need a better way than writing 'videos-for-each-labeled-frame'

if os.path.exists(preds_file) and not overwrite:
print(f'Predictions file {preds_file} already exists. Skipping.')
continue

results_list = []
file_path = os.path.join(data_dir, csv_file)

df = pd.read_csv(file_path, header=[0,1,2], index_col=0)

for img_path in df.index:
# process the paths
relative_img_path = '/'.join(img_path.split('/')[1:]) # removed 'labeled-data/'
snippet_path = relative_img_path.replace('png', 'mp4')


# Load the 51-frame csv file
snippet_file = os.path.join(results_dir, video_dir , snippet_path.replace('mp4', 'csv'))
if os.path.exists(snippet_file):
snippet_df = pd.read_csv(snippet_file, header=[0,1,2], index_col=0)

# extract center frame
assert snippet_df.shape[0] & 2 != 0 # ensure odd number of frames
idx_frame = int(np.floor(snippet_df.shape[0] / 2))

# create results with original image path as index
result = snippet_df[snippet_df.index == idx_frame].rename(index={idx_frame: img_path})
results_list.append(result)

# combine all results
if results_list:
results_df = pd.concat(results_list)
results_df.sort_index(inplace=True)

# Add "set" column so this df is interpreted as labeled data predictions
results_df.loc[:,("set", "", "")] = "train"

# save predictions
results_df.to_csv(preds_file)
print(f'Saved predictions to {preds_file}')

cfg_lp.data.csv_file = new_csv_files

try:
compute_metrics(cfg=cfg_lp, preds_file=preds_file, data_module=None)
print(f"Succesfully computed metrics for {preds_file}")
except Exception as e:
print(f"Error computing metrics\n{e}")


# look for all files that end in _new.csv -> these are OOD labels
# loop through these
#load the csv file and iterate through the rows/index
# loop through these
# for each, load the csv file, and iterate through the rows/index
# 'labeled-data/<vid_name>/img<#>.png'
# 'labeled-data/<vid_name>/img<#>.png'
# s = 'labeled-data/vid_name/img0000.png'
# s2 = '/'.join(s.split('/')[1:])
# s3 = s2.replace('png', 'mp4')
Expand All @@ -20,3 +96,6 @@ def extract_ood_frame_predictions(
# put in dataframe
# save out predictions_<cam_name>.csv
# compute pixel


# Next step - do one of the post processes and the ensemble mean
Loading

0 comments on commit 578bd8e

Please sign in to comment.