Skip to content

Commit

Permalink
Version bump to 1.2
Browse files Browse the repository at this point in the history
  • Loading branch information
mmcauliffe committed Aug 30, 2021
1 parent a27725e commit 9f04ce0
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 30 deletions.
16 changes: 6 additions & 10 deletions docs/source/acoustics_encoding.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ The default source is Praat.
c.analyze_pitch(source='reaper')
If the source is `praat`, the Praat executable must be discoverable on the system path (i.e., a call of `praat` in a terminal works).
Likewise, if the source is `reaper`, the Reaper executable must be on the path or the full path to the Reaper executable must be specified.
If the source is `praat`, the Praat executable must be discoverable on the system path (i.e., a call of `praat` in a terminal works). Likewise, if the source is `reaper`, the Reaper executable must be on the path or the full path to the Reaper executable must be specified.


.. _pitch_algorithms:
Expand All @@ -75,20 +74,17 @@ Similar to the `source`, attribute, the `algorithm` can be toggled between :code
c.analyze_pitch(algorithm='speaker_adapted')
The :code:`"base"` algorithm uses a minimum pitch of 55 Hz and a maximum pitch of 480 Hz.
The :code:`"base"` algorithm uses a default minimum pitch of 50 Hz and a maximum pitch of 500 Hz, but these can be changed through the ``absolute_min_pitch`` and ``absolute_max_pitch`` parameters.

The :code:`"gendered"` algorithm checks whether a `Gender` property is available for speakers. If a speaker has a property
value that starts with `f` (i.e., female),
utterances by that speakers will use a minimum pitch of 100 Hz and a maximum pitch of 480 Hz. If they have a property
utterances by that speakers will use a minimum pitch of 100 Hz and a maximum pitch of 500 Hz. If they have a property
value of `m` (i.e., male),
utterances by that speakers will use a minimum pitch of 55 Hz and a maximum pitch of 400 Hz.
utterances by that speakers will use a minimum pitch of 50 Hz and a maximum pitch of 400 Hz.

The :code:`"speaker_adapted"` algorithm does two passes of pitch estimation. The first is identical to :code:`"base"`
and uses a minimum pitch of 55 Hz and a maximum pitch of 480 Hz.
This first pass is used to estimate by-speaker means and standard deviations of F0. The mean and SD for each speaker is
then used to generate per-speaker minimum and maximum pitch values.
The minimum pitch value is 3 standard deviations below the speaker mean, and the maximum pitch value is 3 standard
deviations above the speaker mean.
and uses a minimum pitch of 50 Hz and a maximum pitch of 500 Hz (or whatever the parameters have been set to).
This first pass is used to estimate by-speaker means of F0. Speaker-specific pitch floors and ceilings are calculated by adding or subtracting the number of octaves that the ``adjusted_octaves`` parameter specifies. The default is 1, so the per-speaker pitch range will be one octave below and above the speaker's mean pitch.

.. _intensity_encoding:

Expand Down
1 change: 1 addition & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Version 1.2
* Upgraded InfluxDB compatibility to 1.8.9
* Changed Praat TextGrid handling to use praatio 4.1
* Phone parsing no longer includes blank intervals (i.e. silences), so preceding and following phone calculations have changed
* Update speaker adjusted pitch algorithm to use octave based min and max pitch rather than the more permissive standard deviation approach

Version 1.0
===========
Expand Down
2 changes: 1 addition & 1 deletion polyglotdb/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
__ver_major__ = 1
__ver_minor__ = 2
__ver_patch__ = '0a1'
__ver_patch__ = 0
__version__ = f"{__ver_major__}.{__ver_minor__}.{__ver_patch__}"

__all__ = ['query', 'io', 'corpus', 'config', 'exceptions', 'CorpusContext', 'CorpusConfig']
Expand Down
41 changes: 27 additions & 14 deletions polyglotdb/acoustics/pitch/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ def analyze_utterance_pitch(corpus_context, utterance, source='praat', min_pitch
(u:{utt_type}:{corpus_name})-[:spoken_by]->(s),
(u)-[:spoken_in]->(d)
WHERE u.id = $utterance_id
RETURN u, d, r.channel as channel'''.format(corpus_name=corpus_context.cypher_safe_name, utt_type=utt_type)
RETURN u, d, r.channel as channel'''.format(corpus_name=corpus_context.cypher_safe_name,
utt_type=utt_type)
results = corpus_context.execute_cypher(statement, utterance_id=utterance_id)
segment_mapping = SegmentMapping()
for r in results:
Expand All @@ -50,7 +51,7 @@ def analyze_utterance_pitch(corpus_context, utterance, source='praat', min_pitch
if v['F0'] is None or v['F0'] <= 0:
continue
p = TimePoint(k)
p.add_value('F0', v['F0'])
p.add_value('F0', v['F0'])
track.add(p)
if 'pitch' not in corpus_context.hierarchy.acoustics:
corpus_context.hierarchy.add_acoustic_properties(corpus_context, 'pitch', [('F0', float)])
Expand All @@ -74,8 +75,9 @@ def update_utterance_pitch_track(corpus_context, utterance, new_track):
(p:{phone_type}:{corpus_name})-[:contained_by*]->(u)
WHERE u.id = $utterance_id
SET u.pitch_last_edited = $date
RETURN u, d, r.channel as channel, s, collect(p) as p'''.format(corpus_name=corpus_context.cypher_safe_name,
utt_type=utt_type, phone_type=phone_type)
RETURN u, d, r.channel as channel, s, collect(p) as p'''.format(
corpus_name=corpus_context.cypher_safe_name,
utt_type=utt_type, phone_type=phone_type)
results = corpus_context.execute_cypher(statement, utterance_id=utterance_id, date=time_stamp)

for r in results:
Expand Down Expand Up @@ -136,23 +138,37 @@ def analyze_pitch(corpus_context,
source='praat',
algorithm='base',
call_back=None,
absolute_min_pitch=50,
absolute_max_pitch=500,
adjusted_octaves=1,
stop_check=None, multiprocessing=True):
"""
Parameters
----------
corpus_context : :class:`~polyglotdb.corpus.audio.AudioContext`
source : str
Program to use for analyzing pitch, either ``praat`` or ``reaper``
algorithm : str
Algorithm to use, ``base``, ``gendered``, or ``speaker_adjusted``
absolute_min_pitch : int
Absolute pitch floor
absolute_max_pitch : int
Absolute pitch ceiling
adjusted_octaves : int
How many octaves around the speaker's mean pitch to set the speaker adjusted pitch floor and ceiling
stop_check : callable
Function to check whether processing should stop early
call_back : callable
stop_check : callable
Function to report progress
multiprocessing : bool
Flag whether to use multiprocessing or threading
Returns
-------
"""
absolute_min_pitch = 50
absolute_max_pitch = 500

if not 'utterance' in corpus_context.hierarchy:
raise (Exception('Must encode utterances before pitch can be analyzed'))
segment_mapping = generate_utterance_segments(corpus_context, padding=PADDING).grouped_mapping('speaker')
Expand Down Expand Up @@ -181,18 +197,17 @@ def analyze_pitch(corpus_context,
output = analyze_segments(v, pitch_function, stop_check=stop_check, multiprocessing=multiprocessing)

sum_pitch = 0
sum_square_pitch = 0
n = 0
for seg, track in output.items():
for t, v in track.items():
v = v['F0']

if v is not None and v > 0: # only voiced frames

n += 1
sum_pitch += v
sum_square_pitch += v * v
speaker_data[k] = [sum_pitch / n, math.sqrt((n * sum_square_pitch - sum_pitch * sum_pitch) / (n * (n - 1)))]
mean_pitch = sum_pitch / n
speaker_data[k] = int(mean_pitch / math.pow(2, adjusted_octaves)), \
int( mean_pitch * math.pow(2, adjusted_octaves))

for i, ((speaker,), v) in enumerate(segment_mapping.items()):
if call_back is not None:
Expand All @@ -214,9 +229,7 @@ def analyze_pitch(corpus_context,
pitch_function = generate_pitch_function(source, min_pitch, max_pitch,
path=path)
elif algorithm == 'speaker_adjusted':
mean_pitch, sd_pitch = speaker_data[speaker]
min_pitch = int(mean_pitch - 3 * sd_pitch)
max_pitch = int(mean_pitch + 3 * sd_pitch)
min_pitch, max_pitch = speaker_data[speaker]
if min_pitch < absolute_min_pitch:
min_pitch = absolute_min_pitch
if max_pitch > absolute_max_pitch:
Expand Down
15 changes: 13 additions & 2 deletions polyglotdb/corpus/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,9 @@ def generate_spectrogram(self, discourse, file_type='consonant', begin=None, end
signal, sr = self.load_waveform(discourse, file_type, begin, end)
return generate_spectrogram(signal, sr)

def analyze_pitch(self, source='praat', algorithm='base', stop_check=None, call_back=None, multiprocessing=True):
def analyze_pitch(self, source='praat', algorithm='base',
absolute_min_pitch=50, absolute_max_pitch=500, adjusted_octaves=1,
stop_check=None, call_back=None, multiprocessing=True):
"""
Analyze pitch tracks and save them to the database.
Expand All @@ -268,14 +270,21 @@ def analyze_pitch(self, source='praat', algorithm='base', stop_check=None, call_
Program to use for analyzing pitch, either ``praat`` or ``reaper``
algorithm : str
Algorithm to use, ``base``, ``gendered``, or ``speaker_adjusted``
absolute_min_pitch : int
Absolute pitch floor
absolute_max_pitch : int
Absolute pitch ceiling
adjusted_octaves : int
How many octaves around the speaker's mean pitch to set the speaker adjusted pitch floor and ceiling
stop_check : callable
Function to check whether processing should stop early
call_back : callable
Function to report progress
multiprocessing : bool
Flag whether to use multiprocessing or threading
"""
analyze_pitch(self, source, algorithm, stop_check, call_back, multiprocessing=multiprocessing)
analyze_pitch(self, source, algorithm, stop_check=stop_check, call_back=call_back, multiprocessing=multiprocessing,
absolute_min_pitch=absolute_min_pitch, absolute_max_pitch=absolute_max_pitch, adjusted_octaves=adjusted_octaves)

def analyze_utterance_pitch(self, utterance, source='praat', **kwargs):
"""
Expand Down Expand Up @@ -813,6 +822,8 @@ def _save_measurement_tracks(self, acoustic_name, tracks, speaker):
v = sanitize_value(value[name], type)
if v is not None:
fields[name] = v
elif type in [int, float]:
fields[name] = type(-1)
if not fields:
continue
if set_label is None:
Expand Down
4 changes: 2 additions & 2 deletions polyglotdb/query/annotations/attributes/acoustic.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,9 +251,9 @@ def hydrate(self, corpus, utterance_id, begin, end):
undef_regions.append((x1, x[i + 1]))
new_data = RawTrack()
for o in self.attribute.output_columns:
y = [data[x1][o] for x1 in x]
y = [data[x1][o] for x1 in x if data[x1][o] and data[x1][o] > 0]
if len(y) > 1:
f = interpolate.interp1d([float(x1) for x1 in x], y)
f = interpolate.interp1d([float(x1) for x1 in x if data[x1][o] and data[x1][o] > 0], y)
for k in new_times:
out_time = k
if self.attribute.relative_time:
Expand Down
2 changes: 1 addition & 1 deletion polyglotdb/query/annotations/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def rows_for_csv(self):
for point in line.track:
line = {}
line.update(baseline)
line.update({'time': point.time})
line.update({'time': round(point.time, 4)})
line.update(point.select_values(self.track_columns))
yield line
else:
Expand Down

0 comments on commit 9f04ce0

Please sign in to comment.