diff --git a/.gitignore b/.gitignore index 79f2e20..a961d04 100644 --- a/.gitignore +++ b/.gitignore @@ -73,4 +73,8 @@ target/ # *.png # Old generator -GESGenerator \ No newline at end of file +GESGenerator +.DS_Store +tmp +records +datasets diff --git a/02_labeling.ipynb b/02_labeling.ipynb index ba77b84..6d9875e 100644 --- a/02_labeling.ipynb +++ b/02_labeling.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -52,21 +52,33 @@ "name": "stdout", "output_type": "stream", "text": [ - "Label export of scenarios\\SAEZ_test_17\\SAEZ_test_17.yaml started\n", - "Skipping invalid image : scenarios\\SAEZ_test_17\\footage\\SAEZ_test_17_46.jpeg\n", - "Skipping invalid image : scenarios\\SAEZ_test_17\\footage\\SAEZ_test_17_47.jpeg\n", - "Skipping invalid image : scenarios\\SAEZ_test_17\\footage\\SAEZ_test_17_48.jpeg\n", - "Skipping invalid image : scenarios\\SAEZ_test_17\\footage\\SAEZ_test_17_49.jpeg\n" + "Label export of scenarios/KFLL_10R-28L_500.yaml started\n", + "Label export of scenarios/SKBO_31L-13R_500.yaml started\n", + "Label export of scenarios/SKBO_31R-13L_500.yaml started\n", + "Label export of scenarios/RPLL_06-24_500.yaml started\n", + "Label export of scenarios/OERK_33R-15L_500.yaml started\n", + "Label export of scenarios/OERK_33L-15R_500.yaml started\n", + "Label export of scenarios/ZGGG_02L-20R_500.yaml started\n", + "Label export of scenarios/ZGGG_02R-20L_500.yaml started\n", + "Label export of scenarios/MMMX_05R-23L_500.yaml started\n", + "Label export of scenarios/VOBL_09R-27L_500.yaml started\n", + "Label export of scenarios/VOBL_09L-27R_500.yaml started\n", + "Label export of scenarios/VVTS_07L-25R_500.yaml started\n", + "Label export of scenarios/VVTS_07R-25L_500.yaml started\n", + "Label export of scenarios/ZGGG_01-19_500.yaml started\n", + "Label export of scenarios/RPLL_13-31_500.yaml started\n" ] } ], "source": [ - "config = ExportConfig()\n", + "config = ExportConfig(strict=False)\n", "config.dataset_name = \"train_dataset\"\n", - "config.output_directory = \"data\"\n", - "config.add_dataset_for_export(dataset_name=\"SAEZ_test\", # The name of the input dataset\n", + "os.makedirs(\"datasets\", exist_ok=True)\n", + "config.output_directory = \"datasets\"\n", + "config.add_dataset_for_export(dataset_name=\"train\", # The name of the input dataset\n", " dataset_folder = \"scenarios\", # The folder where the input dataset should be \n", - " dataset_type = DatasetTypes.EARTH_STUDIO) # Type of dataset, either REAL or EARTH_STUDIO for now.\n", + " records_dirpath = \"records\", # Where generated samples are stored\n", + " dataset_type = DatasetTypes.EARTH_STUDIO) # Type of dataset, either REAL or EARTH_STUDIO for now.\n", "# Once the configuration is exported, export can be started\n", "export_datasets(config)" ] @@ -94,6 +106,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ diff --git a/src/labeling/earth_studio_export.py b/src/labeling/earth_studio_export.py index add7460..a509f5c 100644 --- a/src/labeling/earth_studio_export.py +++ b/src/labeling/earth_studio_export.py @@ -56,7 +56,7 @@ def convert_label(image_shape, image_path, c, frame, runways_database, img_nb): return label -def export_labels(yaml_config, google_export_dir=None, out_labels_file=None, out_images_dir=None): +def export_labels(yaml_config, strict, google_export_dir=None, out_labels_file=None, out_images_dir=None): # Parse configuration file # All the dependencies print(f"Label export of {yaml_config} started") @@ -69,7 +69,7 @@ def export_labels(yaml_config, google_export_dir=None, out_labels_file=None, out out_images_dir = google_export_dir / "exported_images" with open(yaml_config, 'r') as f: - c: dict = yaml.safe_load(f) + c: dict = yaml.full_load(f) if 'poses' not in c.keys(): raise RuntimeError('The configuration file is not complete, missing poses.') @@ -101,11 +101,12 @@ def export_labels(yaml_config, google_export_dir=None, out_labels_file=None, out # print(f"Skipping missing image : {image_path}") # continue output_image_path = out_images_dir / image_path.name - label = convert_label(image_shape, output_image_path, c, frame_position, runways_database, i) - if not is_runway_image_valid(image_shape, label): - print(f"Skipping invalid image : {image_path}") - continue - labels.add_label(label) + for label in convert_label(image_shape, output_image_path, c, frame_position, runways_database, i): + ok, msg = is_runway_image_valid(image_shape, label, strict, debug=False) + if not ok: + # print(f"Skipping invalid runway for image {image_path} {label['runway']}: {msg}") + continue + labels.add_label(label) shutil.copy(image_path, output_image_path) # Generate label file diff --git a/src/labeling/export_config.py b/src/labeling/export_config.py index 5fcaae0..0a528fc 100644 --- a/src/labeling/export_config.py +++ b/src/labeling/export_config.py @@ -1,5 +1,8 @@ -from yaml import safe_load from enum import Enum +from pathlib import Path +from typing import Optional + +from yaml import safe_load # Ensure user do not misspell the types, or add not supported yet types @@ -16,14 +19,17 @@ class ExportConfig: """ Class to load and store configuration for Lard dataset creation, merge and export. """ - def __init__(self, yaml_file: str = None): + def __init__(self, yaml_file: str = None, strict: bool = True): """ :param yaml_file: path to yml file. If None, an empty config is generated. :type yaml_file: str + :param strict: whether to force all runway corners to be part of the image. + :type strict: bool """ if yaml_file is None: self.output_directory = None self.dataset_name = None + self.strict = strict self.included_datasets = dict() else: with open(yaml_file, 'r') as f: @@ -31,9 +37,12 @@ def __init__(self, yaml_file: str = None): for value in self.included_datasets.values(): value["type"] = DatasetTypes(value["type"]) - def add_dataset_for_export(self, dataset_name: str, dataset_folder: str, dataset_type: str) -> None: + def add_dataset_for_export(self, dataset_name: str, dataset_folder: str, dataset_type: str, records_dirpath: Optional[Path] = None) -> None: """ Add an acquisition dataset to the ones being exported and merged. """ dataset_type = DatasetTypes(dataset_type) - self.included_datasets[dataset_name] = {"path": dataset_folder, "type": dataset_type} + params = {"path": dataset_folder, "type": dataset_type} + if records_dirpath is not None: + params["records_dirpath"] = Path(records_dirpath) + self.included_datasets[dataset_name] = params diff --git a/src/labeling/generate_dataset.py b/src/labeling/generate_dataset.py index b09b0f0..07685d9 100644 --- a/src/labeling/generate_dataset.py +++ b/src/labeling/generate_dataset.py @@ -1,9 +1,11 @@ +import functools +from multiprocessing import Pool import argparse import os import glob import shutil from pathlib import Path -from typing import Union +from typing import Dict, Optional, Union from src.labeling.earth_studio_export import export_labels from src.labeling.json_export import from_json from src.labeling.labels import Labels @@ -17,6 +19,7 @@ def export_real_directory(folder_path: Union[str, Path], test_images_dir: Union[ """ Parse and return the metadata for a real dataset and associated files. """ + # FIXME: strict not used labels = Labels() img_list = [] for ext in IMG_TYPES: @@ -31,25 +34,48 @@ def export_real_directory(folder_path: Union[str, Path], test_images_dir: Union[ return labels -def export_synthesized_directory(folder_path: Union[str, Path], test_images_dir: Union[str, Path]) -> dict: +def export_synthesized_directory_worker(strict, test_images_dir, records_dirpath, acquisition_path: Path) -> Optional[Labels]: + scenario = acquisition_path.stem + if os.path.isdir(acquisition_path): + try: + folder_labels = export_labels(acquisition_path / f"{scenario}.yaml", strict, out_images_dir=test_images_dir) + except KeyError as e: + print(f"Missing data for scenario {scenario} ({e} was not found): scenario skipped ") + return + except FileNotFoundError as e: + print(f"File {e.filename} could not be found for scenario {scenario} : scenario skipped ") + return + folder_labels.add_metadata("scenario", scenario) + return folder_labels + elif acquisition_path.suffix == ".yaml": + google_export_dir=records_dirpath / acquisition_path.stem + if not google_export_dir.exists(): + return + try: + + folder_labels = export_labels(acquisition_path, strict, out_images_dir=test_images_dir, google_export_dir=records_dirpath / scenario) + except KeyError as e: + print(f"Missing data for scenario {scenario} ({e} was not found): scenario skipped ") + return + except FileNotFoundError as e: + print(f"File {e.filename} could not be found for scenario {scenario} : scenario skipped ") + return + folder_labels.add_metadata("scenario", scenario) + return folder_labels + + +def export_synthesized_directory(folder_path: Union[str, Path], strict: bool, test_images_dir: Union[str, Path], records_dirpath: Optional[Path] = None) -> dict: """ Parse and return the metadata for a synthetized google earth dataset. """ labels = Labels() folder_path = Path(folder_path) - for scenario in os.listdir(folder_path): - acquisition_path = folder_path / scenario - if os.path.isdir(acquisition_path): - try: - folder_labels = export_labels(acquisition_path / f"{scenario}.yaml", out_images_dir=test_images_dir) - except KeyError as e: - print(f"Missing data for scenario {scenario} ({e} was not found): scenario skipped ") - continue - except FileNotFoundError as e: - print(f"File {e.filename} could not be found for scenario {scenario} : scenario skipped ") - continue - folder_labels.add_metadata("scenario", scenario) - labels += folder_labels + # Enable multiprocessing + with Pool(os.cpu_count()) as p: + for folder_labels in p.imap_unordered(functools.partial(export_synthesized_directory_worker, strict, test_images_dir, records_dirpath), folder_path.glob("*")): + if folder_labels: + labels += folder_labels + return labels @@ -73,7 +99,7 @@ def export_datasets(export_config: ExportConfig) -> None: dataset_type = dataset_infos["type"] dataset_path = dataset_infos["path"] if dataset_type == DatasetTypes.EARTH_STUDIO: - dataset_labels = export_synthesized_directory(dataset_path, test_images_dir) + dataset_labels = export_synthesized_directory(dataset_path, export_config.strict, test_images_dir, records_dirpath=dataset_infos["records_dirpath"] if "records_dirpath" in dataset_infos else None) elif dataset_type == DatasetTypes.REAL: dataset_labels = export_real_directory(dataset_path, test_images_dir) else: diff --git a/src/labeling/labels_utils.py b/src/labeling/labels_utils.py index 3417daa..f64bddb 100644 --- a/src/labeling/labels_utils.py +++ b/src/labeling/labels_utils.py @@ -1,6 +1,9 @@ +from typing import Tuple + import numpy as np import pandas as pd from shapely.geometry import Polygon + from src.labeling.export_config import CORNERS_NAMES @@ -40,7 +43,7 @@ def crop_bbox(bbox: np.array, width: int, height: int) -> np.array: return np.array([x_min, y_min, x_max, y_max]) -def is_runway_image_valid(image_shape: tuple[int], label: pd.DataFrame, debug: bool = False) -> bool: +def is_runway_image_valid(image_shape: tuple[int], label: pd.DataFrame, strict: bool, debug: bool = False) -> Tuple[bool, str]: """ Check if a dataset image is valid : @@ -51,6 +54,8 @@ def is_runway_image_valid(image_shape: tuple[int], label: pd.DataFrame, debug: b :type image_shape: tuple[int] :param label: dataframe with labels :type label: pd.DataFrame + :param strict: whether to force runways be entirely within the image. + :type strict: bool :param debug: print debug information of why an image was deemed invalid. :type debug: bool :return: if the image is valid or not @@ -61,16 +66,19 @@ def is_runway_image_valid(image_shape: tuple[int], label: pd.DataFrame, debug: b image_poly = Polygon([(0, 0), (0, image_shape[0]), image_shape[:2], (image_shape[1], 0)]) if not runway_poly.intersects(image_poly): + msg = "Runway fully out of the image" if debug: - print("Invalid image : runway fully out of the image") + print(msg) # runway and image do not intersects and are not contained in one another, we continue to next runway - return False + return False, msg if runway_poly.contains(image_poly): + msg = "Runway fully contains the image" if debug: - print("Invalid image : runway fully contains the image") - return False - if not image_poly.contains(runway_poly): + print(msg) + return False, msg + if strict and not image_poly.contains(runway_poly): + msg = "At least one runway corner is outside the image " if debug: - print("Invalid image : at least one runway corner is outside the image ") - return False - return True + print(msg) + return False, msg + return True, "Success" diff --git a/src/scenario/scenario_config.py b/src/scenario/scenario_config.py index cd0303b..d005e79 100644 --- a/src/scenario/scenario_config.py +++ b/src/scenario/scenario_config.py @@ -10,7 +10,7 @@ def __init__(self, airport=None, runway=None, scenario_dir=None, yaml_file=None) If a yaml_file is provided, the parameters airport, runway, scenario dir are not used, and the ones from the yaml will be used instead. """ - self.content = ScenarioContent(airport=airport, runways=runway) + self.content = ScenarioContent(airport=airport, runways=runway if isinstance(runway, list) else list(runway)) self.outputs = DefaultOutputs() if scenario_dir is not None: self.scenario_dir = Path(scenario_dir)