feat: Diarizer class wrapping pyannote/speaker-diarization-3.1

This commit is contained in:
2026-04-02 00:59:50 +02:00
parent 47909637a8
commit 1a9d0eacc2
3 changed files with 66 additions and 0 deletions
+27
View File
@@ -0,0 +1,27 @@
import asyncio
class Diarizer:
def __init__(self, hf_token: str):
if not hf_token:
raise ValueError("hf_token is required for diarization")
self._hf_token = hf_token
self._pipeline = None
def _load_pipeline(self):
if self._pipeline is None:
from pyannote.audio import Pipeline
self._pipeline = Pipeline.from_pretrained(
"pyannote/speaker-diarization-3.1",
use_auth_token=self._hf_token,
)
return self._pipeline
async def diarize(self, wav_path: str) -> list[tuple[float, float, str]]:
loop = asyncio.get_running_loop()
pipeline = await loop.run_in_executor(None, self._load_pipeline)
annotation = await loop.run_in_executor(None, lambda: pipeline(wav_path))
return [
(turn.start, turn.end, speaker)
for turn, _, speaker in annotation.itertracks(yield_label=True)
]