import asyncio class Diarizer: def __init__(self, hf_token: str): if not hf_token: raise ValueError("hf_token is required for diarization") self._hf_token = hf_token self._pipeline = None def _load_pipeline(self): if self._pipeline is None: from pyannote.audio import Pipeline self._pipeline = Pipeline.from_pretrained( "pyannote/speaker-diarization-3.1", token=self._hf_token, ) return self._pipeline async def diarize(self, wav_path: str) -> list[tuple[float, float, str]]: loop = asyncio.get_running_loop() pipeline = await loop.run_in_executor(None, self._load_pipeline) result = await loop.run_in_executor(None, lambda: pipeline(wav_path)) # pyannote 4.x returns DiarizeOutput; older versions return Annotation directly annotation = getattr(result, "speaker_diarization", result) return [ (turn.start, turn.end, speaker) for turn, _, speaker in annotation.itertracks(yield_label=True) ]