feat: Diarizer class wrapping pyannote/speaker-diarization-3.1
This commit is contained in:
@@ -0,0 +1,27 @@
|
||||
import asyncio
|
||||
|
||||
|
||||
class Diarizer:
|
||||
def __init__(self, hf_token: str):
|
||||
if not hf_token:
|
||||
raise ValueError("hf_token is required for diarization")
|
||||
self._hf_token = hf_token
|
||||
self._pipeline = None
|
||||
|
||||
def _load_pipeline(self):
|
||||
if self._pipeline is None:
|
||||
from pyannote.audio import Pipeline
|
||||
self._pipeline = Pipeline.from_pretrained(
|
||||
"pyannote/speaker-diarization-3.1",
|
||||
use_auth_token=self._hf_token,
|
||||
)
|
||||
return self._pipeline
|
||||
|
||||
async def diarize(self, wav_path: str) -> list[tuple[float, float, str]]:
|
||||
loop = asyncio.get_running_loop()
|
||||
pipeline = await loop.run_in_executor(None, self._load_pipeline)
|
||||
annotation = await loop.run_in_executor(None, lambda: pipeline(wav_path))
|
||||
return [
|
||||
(turn.start, turn.end, speaker)
|
||||
for turn, _, speaker in annotation.itertracks(yield_label=True)
|
||||
]
|
||||
Reference in New Issue
Block a user