Files
tueit_Transkriptor/tests/test_transcription.py
T

99 lines
3.2 KiB
Python

import asyncio
import pytest
from unittest.mock import MagicMock
def test_transcription_engine_is_singleton():
from transcription import engine, TranscriptionEngine
assert isinstance(engine, TranscriptionEngine)
def test_transcribe_file_calls_whisper(tmp_path):
wav = tmp_path / "test.wav"
wav.write_bytes(b"\x00" * 100)
mock_model = MagicMock()
mock_segment = MagicMock()
mock_segment.text = " Hallo Welt"
mock_model.transcribe.return_value = ([mock_segment], MagicMock())
from transcription import TranscriptionEngine
eng = TranscriptionEngine()
eng._model = mock_model
result = asyncio.run(eng.transcribe_file(str(wav), language="de"))
assert result == "Hallo Welt"
mock_model.transcribe.assert_called_once_with(str(wav), language="de")
@pytest.mark.asyncio
async def test_transcribe_uses_remote_when_base_url_set(tmp_path):
import wave, struct
wav = tmp_path / "test.wav"
with wave.open(str(wav), "wb") as wf:
wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(16000)
wf.writeframes(struct.pack("<100h", *([0] * 100)))
import respx, httpx
from transcription import TranscriptionEngine
eng = TranscriptionEngine()
with respx.mock:
respx.post("http://beastix:8000/v1/audio/transcriptions").mock(
return_value=httpx.Response(200, json={"text": "Hallo Welt"})
)
result = await eng.transcribe_file(
str(wav), language="de", model_name="large-v3",
device="auto", base_url="http://beastix:8000",
)
assert result == "Hallo Welt"
def test_transcribe_file_returns_segments_when_requested(tmp_path):
wav = tmp_path / "test.wav"
wav.write_bytes(b"\x00" * 100)
mock_model = MagicMock()
mock_seg = MagicMock()
mock_seg.text = " Hallo Welt"
mock_seg.start = 0.0
mock_seg.end = 1.5
mock_model.transcribe.return_value = ([mock_seg], MagicMock())
from transcription import TranscriptionEngine
eng = TranscriptionEngine()
eng._model = mock_model
result = asyncio.run(eng.transcribe_file(str(wav), language="de", with_segments=True))
assert isinstance(result, list)
assert result[0]["text"] == "Hallo Welt"
assert result[0]["start"] == 0.0
assert result[0]["end"] == 1.5
@pytest.mark.asyncio
async def test_transcribe_remote_returns_segments_when_requested(tmp_path):
import wave, struct
wav = tmp_path / "test.wav"
with wave.open(str(wav), "wb") as wf:
wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(16000)
wf.writeframes(struct.pack("<100h", *([0] * 100)))
import respx, httpx
from transcription import TranscriptionEngine
eng = TranscriptionEngine()
with respx.mock:
respx.post("http://beastix:8000/v1/audio/transcriptions").mock(
return_value=httpx.Response(200, json={
"text": "Hallo Welt",
"segments": [{"start": 0.0, "end": 1.5, "text": " Hallo Welt"}],
})
)
result = await eng.transcribe_file(
str(wav), language="de", model_name="large-v3",
device="auto", base_url="http://beastix:8000", with_segments=True,
)
assert isinstance(result, list)
assert result[0]["text"] == "Hallo Welt"