33 lines
1.1 KiB
Python
33 lines
1.1 KiB
Python
def align_segments(
|
|
whisper_segs: list[dict],
|
|
speaker_segs: list[tuple[float, float, str]],
|
|
) -> list[tuple[str, str]]:
|
|
"""Assign each Whisper segment to the speaker with the greatest time overlap.
|
|
Consecutive segments from the same speaker are merged into one block."""
|
|
result: list[tuple[str, str]] = []
|
|
for seg in whisper_segs:
|
|
speaker = _best_speaker(seg["start"], seg["end"], speaker_segs)
|
|
text = seg["text"].strip()
|
|
if not text:
|
|
continue
|
|
if result and result[-1][0] == speaker:
|
|
result[-1] = (speaker, result[-1][1] + " " + text)
|
|
else:
|
|
result.append((speaker, text))
|
|
return result
|
|
|
|
|
|
def _best_speaker(
|
|
start: float,
|
|
end: float,
|
|
speaker_segs: list[tuple[float, float, str]],
|
|
) -> str:
|
|
best_label = "SPEAKER_00"
|
|
best_overlap = 0.0
|
|
for s_start, s_end, label in speaker_segs:
|
|
overlap = max(0.0, min(end, s_end) - max(start, s_start))
|
|
if overlap > best_overlap:
|
|
best_overlap = overlap
|
|
best_label = label
|
|
return best_label
|