Skip to content

Commit

Permalink
Merge pull request #12 from LCAV/doa_estimate_pipeline
Browse files Browse the repository at this point in the history
Doa estimate pipeline
  • Loading branch information
duembgen authored Sep 10, 2020
2 parents 74a7831 + 59be226 commit a6aea99
Show file tree
Hide file tree
Showing 11 changed files with 274 additions and 115 deletions.
1 change: 1 addition & 0 deletions src/audio_interfaces/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ set(msg_files
"msg/Signals.msg"
"msg/SignalsFreq.msg"
"msg/PoseRaw.msg"
"msg/DoaEstimates.msg"
)

rosidl_generate_interfaces(${PROJECT_NAME}
Expand Down
5 changes: 5 additions & 0 deletions src/audio_interfaces/msg/DoaEstimates.msg
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
uint32 timestamp
uint8 n_estimates

# Estimate of doas in degrees
float32[] doa_estimates_deg
10 changes: 6 additions & 4 deletions src/audio_publisher/audio_publisher/file_publisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
sys.path.append(os.path.abspath(current_dir + "/../../../crazyflie-audio/python/"))
import file_parser as fp

GT_DEGREES = 0
GT_DEGREES = 20
LOUDNESS = "high"
SOURCE = "white_noise"

Expand Down Expand Up @@ -57,6 +57,8 @@ def read_file_source(self, file_source):
signals_props, signals_source, signals_all = fp.read_recordings_14_7_20(gt_degrees=GT_DEGREES)
elif file_source == "recordings_9_7_20":
signals_props, signals_source, signals_all = fp.read_recordings_9_7_20(gt_degrees=GT_DEGREES)
else:
raise ValueError(file_source)

signals_full = signals_all
start_idx = fp.parameters[file_source]["time_index"]
Expand All @@ -80,11 +82,11 @@ def publish_signals(self):

def publish_position(self):
msg_pose_raw = PoseRaw()
msg_pose_raw.dx = 0.5
msg_pose_raw.dy = 0.1
msg_pose_raw.dx = 0.0
msg_pose_raw.dy = 0.0
msg_pose_raw.z = 0.0
msg_pose_raw.yaw_deg = 0.0
msg_pose_raw.source_direction_deg = 90.0 + GT_DEGREES
msg_pose_raw.source_direction_deg = 90.0 - GT_DEGREES
msg_pose_raw.timestamp = self.get_time_ms()
self.publisher_motion_pose_raw.publish(msg_pose_raw)

Expand Down
94 changes: 59 additions & 35 deletions src/audio_stack/audio_stack/correlator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,30 +20,54 @@
sys.path.append(current_dir + "/../../../crazyflie-audio/python/")
from noise_cancellation import filter_iir_bandpass
from bin_selection import select_frequencies as embedded_select_frequencies
from algos_beamforming import select_frequencies

from audio_interfaces.msg import Signals, SignalsFreq, Correlations

# Denoising method.
# Denoising method
# METHOD_NOISE = "bandpass"
METHOD_NOISE = ""
METHOD_NOISE_DICT = {"bandpass": {"fmin": 100, "fmax": 300, "order": 3}}

# Windowing method. Available:
# Windowing method, available:
# - "" (no window)
# - "tukey" (flat + cosine on borders)
METHOD_WINDOW = "tukey"

# Frequency selection
THRUST = 43000
METHOD_FREQUENCY = "standard"
MIN_FREQ = 100
MAX_FREQ = 10000
DELTA_FREQ = 100

METHOD_FREQUENCY = "none"
METHOD_FREQUENCY_DICT = {
"standard": {
"min_freq": 100,
"max_freq": 10000,
"delta_freq": 100,
"none": {
"min_freq": MIN_FREQ,
"max_freq": MAX_FREQ,
"delta_freq": DELTA_FREQ,
"filter_snr": False,
"thrust": 0,
},
"snr": {
"min_freq": MIN_FREQ,
"max_freq": MAX_FREQ,
"delta_freq": DELTA_FREQ,
"filter_snr": True,
"thrust": 0,
},
"props": {
"min_freq": MIN_FREQ,
"max_freq": MAX_FREQ,
"delta_freq": DELTA_FREQ,
"filter_snr": False,
"thrust": THRUST,
},
"props_snr": {
"min_freq": MIN_FREQ,
"max_freq": MAX_FREQ,
"delta_freq": DELTA_FREQ,
"filter_snr": True,
"thrust": THRUST,
"thrust": THRUST,
},
"single": {
"min_freq": 200,
Expand All @@ -54,14 +78,9 @@
}
}

# Plotting parameters
MIN_YLIM = 1e-3 # set to -inf for no effect.
MAX_YLIM = 1e5 # set to inf for no effect.
MIN_FREQ = -np.inf # set to -inf for no effect
MAX_FREQ = +np.inf # set to inf for no effect

def verify_validity(params_dict):
assert all([key in params_dict.keys() for key in METHOD_FREQUENCY_DICT["standard"].keys()])
assert all([key in params_dict.keys() for key in METHOD_FREQUENCY_DICT[METHOD_FREQUENCY].keys()])
assert params_dict["min_freq"] <= params_dict["max_freq"]
assert params_dict["min_freq"] >= 0
assert params_dict["max_freq"] >= 0
Expand Down Expand Up @@ -106,8 +125,6 @@ def get_stft(signals, Fs, method_window, method_noise):


def create_correlations_message(signals_f, freqs, Fs, n_buffer, method_frequency):

# calculate Rs for chosen frequency bins
R = 1 / signals_f.shape[1] * signals_f[:, :, None] @ signals_f[:, None, :].conj()

msg = Correlations()
Expand All @@ -121,9 +138,7 @@ def create_correlations_message(signals_f, freqs, Fs, n_buffer, method_frequency

class Correlator(Node):
""" Node to subscribe to audio/signals or audio/signals_f and publish correlations.
"""

def __init__(self, plot_freq=False, plot_time=False):
super().__init__("correlator")

Expand All @@ -135,6 +150,9 @@ def __init__(self, plot_freq=False, plot_time=False):
self.subscription_signals_f = self.create_subscription(
SignalsFreq, "audio/signals_f", self.listener_callback_signals_f, 10
)
self.publisher_signals_f = self.create_publisher(
SignalsFreq, "audio/signals_f", 10
)
self.publisher_correlations = self.create_publisher(
Correlations, "audio/correlations", 10
)
Expand Down Expand Up @@ -196,6 +214,7 @@ def listener_callback_signals(self, msg):
signals_f, freqs = get_stft(
signals, msg.fs, self.methods["window"], self.methods["noise"]
) # n_samples x n_mics

if self.methods["frequency"] != "":
bins = embedded_select_frequencies(
msg.n_buffer,
Expand All @@ -206,19 +225,29 @@ def listener_callback_signals(self, msg):
freqs = freqs[bins]
signals_f = signals_f[bins]

self.process_signals_f(signals_f, freqs, msg.fs, msg.timestamp)
# create and publish frequency message
msg_freq = SignalsFreq()
msg_freq.fs = msg.fs
msg_freq.timestamp = msg.timestamp
msg_freq.n_mics = msg.n_mics
msg_freq.n_frequencies = len(freqs)
msg_freq.mic_positions = msg.mic_positions
msg_freq.frequencies = [int(f) for f in freqs]
# important: signals_f should be of shape n_mics x n_frequencies before flatten() is called.
msg_freq.signals_real_vect = list(np.real(signals_f.T).astype(float).flatten())
msg_freq.signals_imag_vect = list(np.imag(signals_f.T).astype(float).flatten())
self.publisher_signals_f.publish(msg_freq)

# check that the above processing pipeline does not
# take too long compared to the desired publish rate.
t2 = time.time()
processing_time = t2 - t1
self.get_logger().info(
f"listener_callback_signals: Publishing after processing time {processing_time:.2f}"
f"listener_callback_signals: Published signals_f after {processing_time:.2f}s"
)

def listener_callback_signals_f(self, msg):
t1 = time.time()
self.labels = [f"mic{i}" for i in range(msg.n_mics)]
self.mic_positions = np.array(msg.mic_positions).reshape((msg.n_mics, -1))

# convert msg format to numpy arrays
Expand All @@ -227,28 +256,23 @@ def listener_callback_signals_f(self, msg):
)
signals_f = signals_f.reshape((msg.n_mics, msg.n_frequencies)).T
freqs = np.array(msg.frequencies)
self.get_logger().info(f"frequencies: {freqs}")

self.process_signals_f(signals_f, freqs, msg.fs, msg.timestamp)
# create and publish correlations message
msg_new = create_correlations_message(
signals_f, freqs, msg.fs, signals_f.shape[0], self.methods["frequency"]
)
msg_new.mic_positions = list(self.mic_positions.astype(float).flatten())
msg_new.timestamp = msg.timestamp
self.publisher_correlations.publish(msg_new)

# check that the above processing pipeline does not
# take too long compared to the desired publish rate.
t2 = time.time()
processing_time = t2 - t1
self.get_logger().info(
f"listener_callback_signals_f: Publishing after processing time {processing_time}"
f"listener_callback_signals_f: Published correlations after {processing_time:.2f}s"
)

def process_signals_f(self, signals_f, freqs, Fs, timestamp):
msg_new = create_correlations_message(
signals_f, freqs, Fs, signals_f.shape[0], self.methods["frequency"]
)
msg_new.mic_positions = list(self.mic_positions.astype(float).flatten())
msg_new.timestamp = timestamp

# publishing
self.publisher_correlations.publish(msg_new)


def main(args=None):
rclpy.init(args=args)
Expand Down
37 changes: 34 additions & 3 deletions src/audio_stack/audio_stack/doa_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,13 @@
import matplotlib.pylab as plt
import numpy as np

from audio_interfaces.msg import Spectrum
from audio_interfaces.msg import Spectrum, DoaEstimates
from .spectrum_estimator import normalize_each_row, NORMALIZE

N_ESTIMATES = 3
COMBINATION_N = 5
COMBINATION_METHOD = "product"


class DoaEstimator(Node):
def __init__(self):
Expand All @@ -31,14 +37,17 @@ def __init__(self):
self.publisher_spectrum = self.create_publisher(
Spectrum, "audio/combined_spectrum", 10
)
self.publisher_doa = self.create_publisher(
DoaEstimates, "geometry/doa_estimates", 10
)
# all in degrees:
self.spectrum_orientation_list = []

# create ROS parameters that can be changed from command line.
self.declare_parameter("combination_n")
self.combination_n = 5
self.combination_n = COMBINATION_N
self.declare_parameter("combination_method")
self.combination_method = "sum"
self.combination_method = COMBINATION_METHOD
parameters = [
rclpy.parameter.Parameter(
"combination_method",
Expand Down Expand Up @@ -92,12 +101,34 @@ def listener_callback_spectrum(self, msg_spec):
spectra_shifted, axis=0
) # n_frequencies x n_angles

combined_spectrum = normalize_each_row(combined_spectrum, NORMALIZE)

# publish
msg_new = msg_spec
msg_new.spectrum_vect = list(combined_spectrum.astype(float).flatten())
self.publisher_spectrum.publish(msg_new)
self.get_logger().info(f"Published combined spectrum.")

# calculate and publish doa estimates
if self.combination_method == "product":
# need to make sure spectrum is not too small before multiplying.
final_spectrum = np.product(normalize_each_row(combined_spectrum, "zero_to_one"),
axis=0, keepdims=True) # n_angles
elif self.combination_method == "sum":
final_spectrum = np.sum(combined_spectrum, axis=0, keepdims=True) # n_angles

final_spectrum = normalize_each_row(final_spectrum, NORMALIZE)
angles = np.linspace(0, 360, msg_spec.n_angles)
sorted_indices = np.argsort(final_spectrum.flatten()) # sorts in ascending order
doa_estimates = angles[sorted_indices[-N_ESTIMATES:][::-1]]

msg_doa = DoaEstimates()
msg_doa.n_estimates = N_ESTIMATES
msg_doa.timestamp = msg_spec.timestamp
msg_doa.doa_estimates_deg = list(doa_estimates.astype(float).flatten())
self.publisher_doa.publish(msg_doa)
self.get_logger().info(f"Published estimates: {doa_estimates}.")


def main(args=None):
import os
Expand Down
59 changes: 38 additions & 21 deletions src/audio_stack/audio_stack/spectrum_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,34 @@

from audio_interfaces.msg import Correlations, Spectrum, PoseRaw
from .beam_former import BeamFormer
from .topic_synchronizer import TopicSynchronizer


# Beamforming method, available:
# - "das": delay-and-sum
# - "mvdr": minimum-variacne distortionless response
BF_METHOD = "das"

NORMALIZE = "zero_to_one"
#NORMALIZE = "sum_to_one"


def normalize_each_row(matrix, method="zero_to_one"):
if method == "zero_to_one":
normalized = (matrix - np.min(matrix, axis=1, keepdims=True)) / (np.max(matrix, axis=1, keepdims=True) - np.min(matrix, axis=1, keepdims=True))
#assert np.max(normalized) == 1.0
#assert np.min(normalized) == 0.0
return normalized
elif method == "sum_to_one":

# first make sure values are between 0 and 1 (otherwise division can lead to errors)
denom = np.max(matrix, axis=1, keepdims=True) - np.min(matrix, axis=1, keepdims=True)
matrix = (matrix - np.min(matrix, axis=1, keepdims=True)) / denom
sum_matrix = np.sum(matrix, axis=1, keepdims=True)
normalized = matrix / sum_matrix
np.testing.assert_allclose(np.sum(normalized, axis=1), 1.0, rtol=1e-5)
return normalized

ALLOWED_LAG_MS = 20 # allowed lag between pose and audio message

class SpectrumEstimator(Node):
def __init__(self, plot=False):
Expand All @@ -32,18 +58,17 @@ def __init__(self, plot=False):
self.subscription_correlations = self.create_subscription(
Correlations, "audio/correlations", self.listener_callback_correlations, 10
)
self.subscription_pose_raw = self.create_subscription(
PoseRaw, "geometry/pose_raw", self.listener_callback_pose_raw, 10
)
self.latest_time_and_orientation = None

self.raw_pose_synch = TopicSynchronizer(20)
self.subscription = self.create_subscription(PoseRaw, "geometry/pose_raw", self.raw_pose_synch.listener_callback, 10)

self.publisher_spectrum = self.create_publisher(Spectrum, "audio/spectrum", 10)

self.beam_former = None

# create ROS parameters that can be changed from command line.
self.declare_parameter("bf_method")
self.bf_method = "mvdr"
self.bf_method = BF_METHOD
parameters = [
rclpy.parameter.Parameter(
"bf_method", rclpy.Parameter.Type.STRING, self.bf_method
Expand Down Expand Up @@ -98,16 +123,13 @@ def listener_callback_correlations(self, msg_cor):
else:
raise ValueError(self.bf_method)

orientation = 0
latest = self.latest_time_and_orientation
if (latest is not None) and (
abs(msg_cor.timestamp - latest[0]) < ALLOWED_LAG_MS
):
orientation = latest[1]
elif latest is not None:
self.get_logger().warn(
f"Did not register valid position estimate: latest correlation at {msg_cor.timestamp}, latest orientation at {latest[0]}"
)
message = self.raw_pose_synch.get_latest_message(msg_cor.timestamp, self.get_logger())
if message is None:
orientation = 0
else:
orientation = message.yaw_deg

spectrum = normalize_each_row(spectrum, NORMALIZE)

# publish
msg_spec = Spectrum()
Expand All @@ -124,11 +146,6 @@ def listener_callback_correlations(self, msg_cor):
self.get_logger().info(f"Published spectrum after {processing_time:.2f}s.")


def listener_callback_pose_raw(self, msg_pose_raw):
self.get_logger().info(f"Processing pose: {msg_pose_raw.timestamp}.")
self.latest_time_and_orientation = (msg_pose_raw.timestamp, msg_pose_raw.yaw_deg)


def main(args=None):
import os

Expand Down
Loading

0 comments on commit a6aea99

Please sign in to comment.