feat: align_segments() — map Whisper timestamps to pyannote speakers
This commit is contained in:
@@ -0,0 +1,32 @@
|
||||
def align_segments(
|
||||
whisper_segs: list[dict],
|
||||
speaker_segs: list[tuple[float, float, str]],
|
||||
) -> list[tuple[str, str]]:
|
||||
"""Assign each Whisper segment to the speaker with the greatest time overlap.
|
||||
Consecutive segments from the same speaker are merged into one block."""
|
||||
result: list[tuple[str, str]] = []
|
||||
for seg in whisper_segs:
|
||||
speaker = _best_speaker(seg["start"], seg["end"], speaker_segs)
|
||||
text = seg["text"].strip()
|
||||
if not text:
|
||||
continue
|
||||
if result and result[-1][0] == speaker:
|
||||
result[-1] = (speaker, result[-1][1] + " " + text)
|
||||
else:
|
||||
result.append((speaker, text))
|
||||
return result
|
||||
|
||||
|
||||
def _best_speaker(
|
||||
start: float,
|
||||
end: float,
|
||||
speaker_segs: list[tuple[float, float, str]],
|
||||
) -> str:
|
||||
best_label = "SPEAKER_00"
|
||||
best_overlap = 0.0
|
||||
for s_start, s_end, label in speaker_segs:
|
||||
overlap = max(0.0, min(end, s_end) - max(start, s_start))
|
||||
if overlap > best_overlap:
|
||||
best_overlap = overlap
|
||||
best_label = label
|
||||
return best_label
|
||||
Reference in New Issue
Block a user