deel-ai · geoffrey-g-delhomme · May 28, 2023 · May 28, 2023
diff --git a/.gitignore b/.gitignore
@@ -73,4 +73,8 @@ target/
 # *.png
 
 # Old generator
-GESGenerator
+GESGenerator
+.DS_Store
+tmp
+records
+datasets
diff --git a/02_labeling.ipynb b/02_labeling.ipynb
@@ -12,7 +12,7 @@
  },
  {
  "cell_type": "code",
- "execution_count": 4,
+ "execution_count": 1,
  "metadata": {},
  "outputs": [],
  "source": [
@@ -52,21 +52,33 @@
  "name": "stdout",
  "output_type": "stream",
  "text": [
- "Label export of scenarios\\SAEZ_test_17\\SAEZ_test_17.yaml started\n",
- "Skipping invalid image : scenarios\\SAEZ_test_17\\footage\\SAEZ_test_17_46.jpeg\n",
- "Skipping invalid image : scenarios\\SAEZ_test_17\\footage\\SAEZ_test_17_47.jpeg\n",
- "Skipping invalid image : scenarios\\SAEZ_test_17\\footage\\SAEZ_test_17_48.jpeg\n",
- "Skipping invalid image : scenarios\\SAEZ_test_17\\footage\\SAEZ_test_17_49.jpeg\n"
+ "Label export of scenarios/KFLL_10R-28L_500.yaml started\n",
+ "Label export of scenarios/SKBO_31L-13R_500.yaml started\n",
+ "Label export of scenarios/SKBO_31R-13L_500.yaml started\n",
+ "Label export of scenarios/RPLL_06-24_500.yaml started\n",
+ "Label export of scenarios/OERK_33R-15L_500.yaml started\n",
+ "Label export of scenarios/OERK_33L-15R_500.yaml started\n",
+ "Label export of scenarios/ZGGG_02L-20R_500.yaml started\n",
+ "Label export of scenarios/ZGGG_02R-20L_500.yaml started\n",
+ "Label export of scenarios/MMMX_05R-23L_500.yaml started\n",
+ "Label export of scenarios/VOBL_09R-27L_500.yaml started\n",
+ "Label export of scenarios/VOBL_09L-27R_500.yaml started\n",
+ "Label export of scenarios/VVTS_07L-25R_500.yaml started\n",
+ "Label export of scenarios/VVTS_07R-25L_500.yaml started\n",
+ "Label export of scenarios/ZGGG_01-19_500.yaml started\n",
+ "Label export of scenarios/RPLL_13-31_500.yaml started\n"
  ]
  }
  ],
  "source": [
- "config = ExportConfig()\n",
+ "config = ExportConfig(strict=False)\n",
  "config.dataset_name = \"train_dataset\"\n",
- "config.output_directory = \"data\"\n",
- "config.add_dataset_for_export(dataset_name=\"SAEZ_test\", # The name of the input dataset\n",
+ "os.makedirs(\"datasets\", exist_ok=True)\n",
+ "config.output_directory = \"datasets\"\n",
+ "config.add_dataset_for_export(dataset_name=\"train\", # The name of the input dataset\n",
  " dataset_folder = \"scenarios\", # The folder where the input dataset should be \n",
- " dataset_type = DatasetTypes.EARTH_STUDIO) # Type of dataset, either REAL or EARTH_STUDIO for now.\n",
+ " records_dirpath = \"records\", # Where generated samples are stored\n",
+ " dataset_type = DatasetTypes.EARTH_STUDIO) # Type of dataset, either REAL or EARTH_STUDIO for now.\n",
  "# Once the configuration is exported, export can be started\n",
  "export_datasets(config)"
  ]
@@ -94,6 +106,7 @@
  ]
  },
  {
+ "attachments": {},
  "cell_type": "markdown",
  "metadata": {},
  "source": [

diff --git a/src/labeling/earth_studio_export.py b/src/labeling/earth_studio_export.py
@@ -56,7 +56,7 @@ def convert_label(image_shape, image_path, c, frame, runways_database, img_nb):
  return label
 
 
-def export_labels(yaml_config, google_export_dir=None, out_labels_file=None, out_images_dir=None):
+def export_labels(yaml_config, strict, google_export_dir=None, out_labels_file=None, out_images_dir=None):
  # Parse configuration file
  # All the dependencies
  print(f"Label export of {yaml_config} started")
@@ -69,7 +69,7 @@ def export_labels(yaml_config, google_export_dir=None, out_labels_file=None, out
  out_images_dir = google_export_dir / "exported_images"
 
  with open(yaml_config, 'r') as f:
- c: dict = yaml.safe_load(f)
+ c: dict = yaml.full_load(f)
  if 'poses' not in c.keys():
  raise RuntimeError('The configuration file is not complete, missing poses.')
 
@@ -101,11 +101,12 @@ def export_labels(yaml_config, google_export_dir=None, out_labels_file=None, out
  # print(f"Skipping missing image : {image_path}")
  # continue
  output_image_path = out_images_dir / image_path.name
- label = convert_label(image_shape, output_image_path, c, frame_position, runways_database, i)
- if not is_runway_image_valid(image_shape, label):
- print(f"Skipping invalid image : {image_path}")
- continue
- labels.add_label(label)
+ for label in convert_label(image_shape, output_image_path, c, frame_position, runways_database, i):
+ ok, msg = is_runway_image_valid(image_shape, label, strict, debug=False)
+ if not ok:
+ # print(f"Skipping invalid runway for image {image_path} {label['runway']}: {msg}")
+ continue
+ labels.add_label(label)
  shutil.copy(image_path, output_image_path)
 
  # Generate label file

diff --git a/src/labeling/export_config.py b/src/labeling/export_config.py
@@ -1,5 +1,8 @@
-from yaml import safe_load
 from enum import Enum
+from pathlib import Path
+from typing import Optional
+
+from yaml import safe_load
 
 
 # Ensure user do not misspell the types, or add not supported yet types
@@ -16,24 +19,30 @@ class ExportConfig:
  """
  Class to load and store configuration for Lard dataset creation, merge and export.
  """
- def __init__(self, yaml_file: str = None):
+ def __init__(self, yaml_file: str = None, strict: bool = True):
  """
  :param yaml_file: path to yml file. If None, an empty config is generated.
  :type yaml_file: str
+ :param strict: whether to force all runway corners to be part of the image.
+ :type strict: bool
  """
  if yaml_file is None:
  self.output_directory = None
  self.dataset_name = None
+ self.strict = strict
  self.included_datasets = dict()
  else:
  with open(yaml_file, 'r') as f:
  self.__dict__.update(safe_load(f))
  for value in self.included_datasets.values():
  value["type"] = DatasetTypes(value["type"])
 
- def add_dataset_for_export(self, dataset_name: str, dataset_folder: str, dataset_type: str) -> None:
+ def add_dataset_for_export(self, dataset_name: str, dataset_folder: str, dataset_type: str, records_dirpath: Optional[Path] = None) -> None:
  """
  Add an acquisition dataset to the ones being exported and merged.
  """
  dataset_type = DatasetTypes(dataset_type)
- self.included_datasets[dataset_name] = {"path": dataset_folder, "type": dataset_type}
+ params = {"path": dataset_folder, "type": dataset_type}
+ if records_dirpath is not None:
+ params["records_dirpath"] = Path(records_dirpath)
+ self.included_datasets[dataset_name] = params
diff --git a/src/labeling/generate_dataset.py b/src/labeling/generate_dataset.py
@@ -1,9 +1,11 @@
+import functools
+from multiprocessing import Pool
 import argparse
 import os
 import glob
 import shutil
 from pathlib import Path
-from typing import Union
+from typing import Dict, Optional, Union
 from src.labeling.earth_studio_export import export_labels
 from src.labeling.json_export import from_json
 from src.labeling.labels import Labels
@@ -17,6 +19,7 @@ def export_real_directory(folder_path: Union[str, Path], test_images_dir: Union[
  """
  Parse and return the metadata for a real dataset and associated files.
  """
+ # FIXME: strict not used
  labels = Labels()
  img_list = []
  for ext in IMG_TYPES:
@@ -31,25 +34,48 @@ def export_real_directory(folder_path: Union[str, Path], test_images_dir: Union[
  return labels
 
 
-def export_synthesized_directory(folder_path: Union[str, Path], test_images_dir: Union[str, Path]) -> dict:
+def export_synthesized_directory_worker(strict, test_images_dir, records_dirpath, acquisition_path: Path) -> Optional[Labels]:
+ scenario = acquisition_path.stem
+ if os.path.isdir(acquisition_path):
+ try:
+ folder_labels = export_labels(acquisition_path / f"{scenario}.yaml", strict, out_images_dir=test_images_dir)
+ except KeyError as e:
+ print(f"Missing data for scenario {scenario} ({e} was not found): scenario skipped ")
+ return
+ except FileNotFoundError as e:
+ print(f"File {e.filename} could not be found for scenario {scenario} : scenario skipped ")
+ return
+ folder_labels.add_metadata("scenario", scenario)
+ return folder_labels
+ elif acquisition_path.suffix == ".yaml":
+ google_export_dir=records_dirpath / acquisition_path.stem
+ if not google_export_dir.exists():
+ return
+ try:
+
+ folder_labels = export_labels(acquisition_path, strict, out_images_dir=test_images_dir, google_export_dir=records_dirpath / scenario)
+ except KeyError as e:
+ print(f"Missing data for scenario {scenario} ({e} was not found): scenario skipped ")
+ return
+ except FileNotFoundError as e:
+ print(f"File {e.filename} could not be found for scenario {scenario} : scenario skipped ")
+ return
+ folder_labels.add_metadata("scenario", scenario)
+ return folder_labels
+
+
+def export_synthesized_directory(folder_path: Union[str, Path], strict: bool, test_images_dir: Union[str, Path], records_dirpath: Optional[Path] = None) -> dict:
  """
  Parse and return the metadata for a synthetized google earth dataset.
  """
  labels = Labels()
  folder_path = Path(folder_path)
- for scenario in os.listdir(folder_path):
- acquisition_path = folder_path / scenario
- if os.path.isdir(acquisition_path):
- try:
- folder_labels = export_labels(acquisition_path / f"{scenario}.yaml", out_images_dir=test_images_dir)
- except KeyError as e:
- print(f"Missing data for scenario {scenario} ({e} was not found): scenario skipped ")
- continue
- except FileNotFoundError as e:
- print(f"File {e.filename} could not be found for scenario {scenario} : scenario skipped ")
- continue
- folder_labels.add_metadata("scenario", scenario)
- labels += folder_labels
+ # Enable multiprocessing
+ with Pool(os.cpu_count()) as p:
+ for folder_labels in p.imap_unordered(functools.partial(export_synthesized_directory_worker, strict, test_images_dir, records_dirpath), folder_path.glob("*")):
+ if folder_labels:
+ labels += folder_labels
+
  return labels
 
 
@@ -73,7 +99,7 @@ def export_datasets(export_config: ExportConfig) -> None:
  dataset_type = dataset_infos["type"]
  dataset_path = dataset_infos["path"]
  if dataset_type == DatasetTypes.EARTH_STUDIO:
- dataset_labels = export_synthesized_directory(dataset_path, test_images_dir)
+ dataset_labels = export_synthesized_directory(dataset_path, export_config.strict, test_images_dir, records_dirpath=dataset_infos["records_dirpath"] if "records_dirpath" in dataset_infos else None)
  elif dataset_type == DatasetTypes.REAL:
  dataset_labels = export_real_directory(dataset_path, test_images_dir)
  else:

diff --git a/src/labeling/labels_utils.py b/src/labeling/labels_utils.py
@@ -1,6 +1,9 @@
+from typing import Tuple
+
 import numpy as np
 import pandas as pd
 from shapely.geometry import Polygon
+
 from src.labeling.export_config import CORNERS_NAMES
 
 
@@ -40,7 +43,7 @@ def crop_bbox(bbox: np.array, width: int, height: int) -> np.array:
  return np.array([x_min, y_min, x_max, y_max])
 
 
-def is_runway_image_valid(image_shape: tuple[int], label: pd.DataFrame, debug: bool = False) -> bool:
+def is_runway_image_valid(image_shape: tuple[int], label: pd.DataFrame, strict: bool, debug: bool = False) -> Tuple[bool, str]:
  """
  Check if a dataset image is valid :
 
@@ -51,6 +54,8 @@ def is_runway_image_valid(image_shape: tuple[int], label: pd.DataFrame, debug: b
  :type image_shape: tuple[int]
  :param label: dataframe with labels
  :type label: pd.DataFrame
+ :param strict: whether to force runways be entirely within the image.
+ :type strict: bool
  :param debug: print debug information of why an image was deemed invalid.
  :type debug: bool
  :return: if the image is valid or not
@@ -61,16 +66,19 @@ def is_runway_image_valid(image_shape: tuple[int], label: pd.DataFrame, debug: b
  image_poly = Polygon([(0, 0), (0, image_shape[0]), image_shape[:2], (image_shape[1], 0)])
 
  if not runway_poly.intersects(image_poly):
+ msg = "Runway fully out of the image"
  if debug:
- print("Invalid image : runway fully out of the image")
+ print(msg)
  # runway and image do not intersects and are not contained in one another, we continue to next runway
- return False
+ return False, msg
  if runway_poly.contains(image_poly):
+ msg = "Runway fully contains the image"
  if debug:
- print("Invalid image : runway fully contains the image")
- return False
- if not image_poly.contains(runway_poly):
+ print(msg)
+ return False, msg
+ if strict and not image_poly.contains(runway_poly):
+ msg = "At least one runway corner is outside the image "
  if debug:
- print("Invalid image : at least one runway corner is outside the image ")
- return False
- return True
+ print(msg)
+ return False, msg
+ return True, "Success"
diff --git a/src/scenario/scenario_config.py b/src/scenario/scenario_config.py
@@ -10,7 +10,7 @@ def __init__(self, airport=None, runway=None, scenario_dir=None, yaml_file=None)
  If a yaml_file is provided, the parameters airport, runway, scenario dir are not used,
  and the ones from the yaml will be used instead.
  """
- self.content = ScenarioContent(airport=airport, runways=runway)
+ self.content = ScenarioContent(airport=airport, runways=runway if isinstance(runway, list) else list(runway))
  self.outputs = DefaultOutputs()
  if scenario_dir is not None:
  self.scenario_dir = Path(scenario_dir)