Merge pull request #12 from LCAV/doa_estimate_pipeline

Doa estimate pipeline
LCAV · Sep 10, 2020 · a6aea99 · a6aea99
2 parents 74a7831 + 59be226
commit a6aea99
Show file tree

Hide file tree

Showing 11 changed files with 274 additions and 115 deletions.
diff --git a/src/audio_interfaces/CMakeLists.txt b/src/audio_interfaces/CMakeLists.txt
@@ -39,6 +39,7 @@ set(msg_files
   "msg/Signals.msg"
   "msg/SignalsFreq.msg"
   "msg/PoseRaw.msg"
+  "msg/DoaEstimates.msg"
 )
 
 rosidl_generate_interfaces(${PROJECT_NAME}

diff --git a/src/audio_interfaces/msg/DoaEstimates.msg b/src/audio_interfaces/msg/DoaEstimates.msg
@@ -0,0 +1,5 @@
+uint32 timestamp
+uint8 n_estimates
+
+# Estimate of doas in degrees
+float32[] doa_estimates_deg
diff --git a/src/audio_publisher/audio_publisher/file_publisher.py b/src/audio_publisher/audio_publisher/file_publisher.py
@@ -18,7 +18,7 @@
 sys.path.append(os.path.abspath(current_dir + "/../../../crazyflie-audio/python/"))
 import file_parser as fp
 
-GT_DEGREES = 0
+GT_DEGREES = 20
 LOUDNESS = "high"
 SOURCE = "white_noise"
 
@@ -57,6 +57,8 @@ def read_file_source(self, file_source):
             signals_props, signals_source, signals_all = fp.read_recordings_14_7_20(gt_degrees=GT_DEGREES)
         elif file_source == "recordings_9_7_20":
             signals_props, signals_source, signals_all = fp.read_recordings_9_7_20(gt_degrees=GT_DEGREES)
+        else:
+            raise ValueError(file_source)
 
         signals_full = signals_all
         start_idx = fp.parameters[file_source]["time_index"]
@@ -80,11 +82,11 @@ def publish_signals(self):
 
     def publish_position(self):
         msg_pose_raw = PoseRaw()
-        msg_pose_raw.dx = 0.5 
-        msg_pose_raw.dy = 0.1
+        msg_pose_raw.dx = 0.0
+        msg_pose_raw.dy = 0.0
         msg_pose_raw.z = 0.0
         msg_pose_raw.yaw_deg = 0.0
-        msg_pose_raw.source_direction_deg = 90.0 + GT_DEGREES
+        msg_pose_raw.source_direction_deg = 90.0 - GT_DEGREES
         msg_pose_raw.timestamp = self.get_time_ms()
         self.publisher_motion_pose_raw.publish(msg_pose_raw)
 

diff --git a/src/audio_stack/audio_stack/correlator.py b/src/audio_stack/audio_stack/correlator.py
@@ -20,30 +20,54 @@
 sys.path.append(current_dir + "/../../../crazyflie-audio/python/")
 from noise_cancellation import filter_iir_bandpass
 from bin_selection import select_frequencies as embedded_select_frequencies
-from algos_beamforming import select_frequencies
 
 from audio_interfaces.msg import Signals, SignalsFreq, Correlations
 
-# Denoising method.
+# Denoising method
 # METHOD_NOISE = "bandpass"
 METHOD_NOISE = ""
 METHOD_NOISE_DICT = {"bandpass": {"fmin": 100, "fmax": 300, "order": 3}}
 
-# Windowing method. Available:
+# Windowing method, available:
 # - "" (no window)
 # - "tukey" (flat + cosine on borders)
 METHOD_WINDOW = "tukey"
 
 # Frequency selection
 THRUST = 43000
-METHOD_FREQUENCY = "standard"
+MIN_FREQ = 100
+MAX_FREQ = 10000
+DELTA_FREQ = 100
+
+METHOD_FREQUENCY = "none"
 METHOD_FREQUENCY_DICT = {
-    "standard": {
-        "min_freq": 100,
-        "max_freq": 10000,
-        "delta_freq": 100,
+    "none": {
+        "min_freq": MIN_FREQ,
+        "max_freq": MAX_FREQ,
+        "delta_freq": DELTA_FREQ,
+        "filter_snr": False,
+        "thrust": 0, 
+    },
+    "snr": {
+        "min_freq": MIN_FREQ,
+        "max_freq": MAX_FREQ,
+        "delta_freq": DELTA_FREQ,
+        "filter_snr": True,
+        "thrust": 0, 
+    },
+    "props": {
+        "min_freq": MIN_FREQ,
+        "max_freq": MAX_FREQ,
+        "delta_freq": DELTA_FREQ,
+        "filter_snr": False,
+        "thrust": THRUST, 
+    },
+    "props_snr": {
+        "min_freq": MIN_FREQ,
+        "max_freq": MAX_FREQ,
+        "delta_freq": DELTA_FREQ,
         "filter_snr": True,
-        "thrust": THRUST,
+        "thrust": THRUST, 
     },
     "single": {
         "min_freq": 200,
@@ -54,14 +78,9 @@
     }
 }
 
-# Plotting parameters
-MIN_YLIM = 1e-3  # set to -inf for no effect.
-MAX_YLIM = 1e5  # set to inf for no effect.
-MIN_FREQ = -np.inf # set to -inf for no effect
-MAX_FREQ = +np.inf  # set to inf for no effect
 
 def verify_validity(params_dict):
-    assert all([key in params_dict.keys() for key in METHOD_FREQUENCY_DICT["standard"].keys()])
+    assert all([key in params_dict.keys() for key in METHOD_FREQUENCY_DICT[METHOD_FREQUENCY].keys()])
     assert params_dict["min_freq"] <= params_dict["max_freq"]
     assert params_dict["min_freq"] >= 0
     assert params_dict["max_freq"] >= 0
@@ -106,8 +125,6 @@ def get_stft(signals, Fs, method_window, method_noise):
 
 
 def create_correlations_message(signals_f, freqs, Fs, n_buffer, method_frequency):
-
-    # calculate Rs for chosen frequency bins
     R = 1 / signals_f.shape[1] * signals_f[:, :, None] @ signals_f[:, None, :].conj()
 
     msg = Correlations()
@@ -121,9 +138,7 @@ def create_correlations_message(signals_f, freqs, Fs, n_buffer, method_frequency
 
 class Correlator(Node):
     """ Node to subscribe to audio/signals or audio/signals_f and publish correlations.
-
     """
-
     def __init__(self, plot_freq=False, plot_time=False):
         super().__init__("correlator")
 
@@ -135,6 +150,9 @@ def __init__(self, plot_freq=False, plot_time=False):
         self.subscription_signals_f = self.create_subscription(
             SignalsFreq, "audio/signals_f", self.listener_callback_signals_f, 10
         )
+        self.publisher_signals_f = self.create_publisher(
+            SignalsFreq, "audio/signals_f", 10
+        )
         self.publisher_correlations = self.create_publisher(
             Correlations, "audio/correlations", 10
         )
@@ -196,6 +214,7 @@ def listener_callback_signals(self, msg):
         signals_f, freqs = get_stft(
             signals, msg.fs, self.methods["window"], self.methods["noise"]
         )  # n_samples x n_mics
+
         if self.methods["frequency"] != "":
             bins = embedded_select_frequencies(
                 msg.n_buffer,
@@ -206,19 +225,29 @@ def listener_callback_signals(self, msg):
             freqs = freqs[bins]
             signals_f = signals_f[bins]
 
-        self.process_signals_f(signals_f, freqs, msg.fs, msg.timestamp)
+        # create and publish frequency message
+        msg_freq = SignalsFreq()
+        msg_freq.fs = msg.fs
+        msg_freq.timestamp = msg.timestamp
+        msg_freq.n_mics = msg.n_mics
+        msg_freq.n_frequencies = len(freqs)
+        msg_freq.mic_positions = msg.mic_positions
+        msg_freq.frequencies = [int(f) for f in freqs]
+        # important: signals_f should be of shape n_mics x n_frequencies before flatten() is called.
+        msg_freq.signals_real_vect = list(np.real(signals_f.T).astype(float).flatten())
+        msg_freq.signals_imag_vect = list(np.imag(signals_f.T).astype(float).flatten())
+        self.publisher_signals_f.publish(msg_freq)
 
         # check that the above processing pipeline does not
         # take too long compared to the desired publish rate.
         t2 = time.time()
         processing_time = t2 - t1
         self.get_logger().info(
-                f"listener_callback_signals: Publishing after processing time {processing_time:.2f}"
+                f"listener_callback_signals: Published signals_f after {processing_time:.2f}s"
         )
 
     def listener_callback_signals_f(self, msg):
         t1 = time.time()
-        self.labels = [f"mic{i}" for i in range(msg.n_mics)]
         self.mic_positions = np.array(msg.mic_positions).reshape((msg.n_mics, -1))
 
         # convert msg format to numpy arrays
@@ -227,28 +256,23 @@ def listener_callback_signals_f(self, msg):
         )
         signals_f = signals_f.reshape((msg.n_mics, msg.n_frequencies)).T
         freqs = np.array(msg.frequencies)
-        self.get_logger().info(f"frequencies: {freqs}")
 
-        self.process_signals_f(signals_f, freqs, msg.fs, msg.timestamp)
+        # create and publish correlations message
+        msg_new = create_correlations_message(
+            signals_f, freqs, msg.fs, signals_f.shape[0], self.methods["frequency"]
+        )
+        msg_new.mic_positions = list(self.mic_positions.astype(float).flatten())
+        msg_new.timestamp = msg.timestamp
+        self.publisher_correlations.publish(msg_new)
 
         # check that the above processing pipeline does not
         # take too long compared to the desired publish rate.
         t2 = time.time()
         processing_time = t2 - t1
         self.get_logger().info(
-            f"listener_callback_signals_f: Publishing after processing time {processing_time}"
+                f"listener_callback_signals_f: Published correlations after {processing_time:.2f}s"
         )
 
-    def process_signals_f(self, signals_f, freqs, Fs, timestamp):
-        msg_new = create_correlations_message(
-            signals_f, freqs, Fs, signals_f.shape[0], self.methods["frequency"]
-        )
-        msg_new.mic_positions = list(self.mic_positions.astype(float).flatten())
-        msg_new.timestamp = timestamp
-
-        # publishing
-        self.publisher_correlations.publish(msg_new)
-
 
 def main(args=None):
     rclpy.init(args=args)

diff --git a/src/audio_stack/audio_stack/doa_estimator.py b/src/audio_stack/audio_stack/doa_estimator.py
@@ -19,7 +19,13 @@
 import matplotlib.pylab as plt
 import numpy as np
 
-from audio_interfaces.msg import Spectrum
+from audio_interfaces.msg import Spectrum, DoaEstimates
+from .spectrum_estimator import normalize_each_row, NORMALIZE
+
+N_ESTIMATES = 3
+COMBINATION_N = 5
+COMBINATION_METHOD = "product"
+
 
 class DoaEstimator(Node):
     def __init__(self):
@@ -31,14 +37,17 @@ def __init__(self):
         self.publisher_spectrum = self.create_publisher(
             Spectrum, "audio/combined_spectrum", 10
         )
+        self.publisher_doa = self.create_publisher(
+            DoaEstimates, "geometry/doa_estimates", 10
+        )
         # all in degrees:
         self.spectrum_orientation_list = []
 
         # create ROS parameters that can be changed from command line.
         self.declare_parameter("combination_n")
-        self.combination_n = 5
+        self.combination_n = COMBINATION_N
         self.declare_parameter("combination_method")
-        self.combination_method = "sum"
+        self.combination_method = COMBINATION_METHOD 
         parameters = [
             rclpy.parameter.Parameter(
                 "combination_method",
@@ -92,12 +101,34 @@ def listener_callback_spectrum(self, msg_spec):
                 spectra_shifted, axis=0
             )  # n_frequencies x n_angles
 
+        combined_spectrum = normalize_each_row(combined_spectrum, NORMALIZE)
+
         # publish
         msg_new = msg_spec
         msg_new.spectrum_vect = list(combined_spectrum.astype(float).flatten())
         self.publisher_spectrum.publish(msg_new)
         self.get_logger().info(f"Published combined spectrum.")
 
+        # calculate and publish doa estimates
+        if self.combination_method == "product":
+            # need to make sure spectrum is not too small before multiplying.
+            final_spectrum = np.product(normalize_each_row(combined_spectrum, "zero_to_one"), 
+                    axis=0, keepdims=True)  # n_angles
+        elif self.combination_method == "sum":
+            final_spectrum = np.sum(combined_spectrum, axis=0, keepdims=True)  # n_angles
+
+        final_spectrum = normalize_each_row(final_spectrum, NORMALIZE)
+        angles = np.linspace(0, 360, msg_spec.n_angles)
+        sorted_indices = np.argsort(final_spectrum.flatten()) # sorts in ascending order
+        doa_estimates = angles[sorted_indices[-N_ESTIMATES:][::-1]]
+
+        msg_doa = DoaEstimates()
+        msg_doa.n_estimates = N_ESTIMATES
+        msg_doa.timestamp = msg_spec.timestamp
+        msg_doa.doa_estimates_deg = list(doa_estimates.astype(float).flatten())
+        self.publisher_doa.publish(msg_doa)
+        self.get_logger().info(f"Published estimates: {doa_estimates}.")
+
 
 def main(args=None):
     import os

diff --git a/src/audio_stack/audio_stack/spectrum_estimator.py b/src/audio_stack/audio_stack/spectrum_estimator.py
@@ -22,8 +22,34 @@
 
 from audio_interfaces.msg import Correlations, Spectrum, PoseRaw
 from .beam_former import BeamFormer
+from .topic_synchronizer import TopicSynchronizer
+
+
+# Beamforming method, available: 
+# - "das": delay-and-sum
+# - "mvdr": minimum-variacne distortionless response
+BF_METHOD = "das"
+
+NORMALIZE = "zero_to_one"
+#NORMALIZE = "sum_to_one"
+
+
+def normalize_each_row(matrix, method="zero_to_one"):
+    if method == "zero_to_one":
+        normalized =  (matrix - np.min(matrix, axis=1, keepdims=True)) / (np.max(matrix, axis=1, keepdims=True) - np.min(matrix, axis=1, keepdims=True))
+        #assert np.max(normalized) == 1.0
+        #assert np.min(normalized) == 0.0
+        return normalized
+    elif method == "sum_to_one":
+
+        # first make sure values are between 0 and 1 (otherwise division can lead to errors)
+        denom = np.max(matrix, axis=1, keepdims=True) - np.min(matrix, axis=1, keepdims=True)
+        matrix =  (matrix - np.min(matrix, axis=1, keepdims=True)) / denom 
+        sum_matrix = np.sum(matrix, axis=1, keepdims=True)
+        normalized = matrix / sum_matrix
+        np.testing.assert_allclose(np.sum(normalized, axis=1), 1.0, rtol=1e-5)
+        return normalized
 
-ALLOWED_LAG_MS = 20  # allowed lag between pose and audio message
 
 class SpectrumEstimator(Node):
     def __init__(self, plot=False):
@@ -32,18 +58,17 @@ def __init__(self, plot=False):
         self.subscription_correlations = self.create_subscription(
             Correlations, "audio/correlations", self.listener_callback_correlations, 10
         )
-        self.subscription_pose_raw = self.create_subscription(
-            PoseRaw, "geometry/pose_raw", self.listener_callback_pose_raw, 10
-        )
-        self.latest_time_and_orientation = None
+
+        self.raw_pose_synch = TopicSynchronizer(20)
+        self.subscription = self.create_subscription(PoseRaw, "geometry/pose_raw", self.raw_pose_synch.listener_callback, 10)
 
         self.publisher_spectrum = self.create_publisher(Spectrum, "audio/spectrum", 10)
 
         self.beam_former = None
 
         # create ROS parameters that can be changed from command line.
         self.declare_parameter("bf_method")
-        self.bf_method = "mvdr"
+        self.bf_method = BF_METHOD
         parameters = [
             rclpy.parameter.Parameter(
                 "bf_method", rclpy.Parameter.Type.STRING, self.bf_method
@@ -98,16 +123,13 @@ def listener_callback_correlations(self, msg_cor):
         else:
             raise ValueError(self.bf_method)
 
-        orientation = 0
-        latest = self.latest_time_and_orientation
-        if (latest is not None) and (
-            abs(msg_cor.timestamp - latest[0]) < ALLOWED_LAG_MS
-        ):
-            orientation = latest[1]
-        elif latest is not None:
-            self.get_logger().warn(
-                f"Did not register valid position estimate: latest correlation at {msg_cor.timestamp}, latest orientation at {latest[0]}"
-            )
+        message = self.raw_pose_synch.get_latest_message(msg_cor.timestamp, self.get_logger())
+        if message is None:
+            orientation = 0
+        else:
+            orientation = message.yaw_deg
+
+        spectrum = normalize_each_row(spectrum, NORMALIZE)
 
         # publish
         msg_spec = Spectrum()
@@ -124,11 +146,6 @@ def listener_callback_correlations(self, msg_cor):
         self.get_logger().info(f"Published spectrum after {processing_time:.2f}s.")
 
 
-    def listener_callback_pose_raw(self, msg_pose_raw):
-        self.get_logger().info(f"Processing pose: {msg_pose_raw.timestamp}.")
-        self.latest_time_and_orientation = (msg_pose_raw.timestamp, msg_pose_raw.yaw_deg)
-
-
 def main(args=None):
     import os