Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sdk/batch/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ async def main():
type=JobType.TRANSCRIPTION,
transcription_config=TranscriptionConfig(
language="en",
operating_point=OperatingPoint.ENHANCED,
model=Model.ENHANCED,
enable_entities=True,
diarization="speaker",
),
Expand Down
2 changes: 1 addition & 1 deletion sdk/batch/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ readme = "README.md"
authors = [{ name = "Speechmatics", email = "support@speechmatics.com" }]
license = "MIT"
requires-python = ">=3.9"
dependencies = ["aiohttp", "aiofiles"]
dependencies = ["aiohttp", "aiofiles", "typing-extensions>=4.5.0"]
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
Expand Down
6 changes: 6 additions & 0 deletions sdk/batch/speechmatics/batch/_async_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from __future__ import annotations

import asyncio
import logging
import os
import uuid
from typing import Any
Expand Down Expand Up @@ -191,6 +192,11 @@ async def submit_job(
transcription_config = transcription_config or TranscriptionConfig()
config = JobConfig(type=JobType.TRANSCRIPTION, transcription_config=transcription_config)

if config.transcription_config is not None and config.transcription_config.operating_point is not None:
logging.warning(
"TranscriptionConfig.operating_point is deprecated and will be removed in the future. Please use the model property instead."
)

# Check for fetch_data configuration
config_dict = config.to_dict()
has_fetch_data = "fetch_data" in config_dict
Expand Down
32 changes: 30 additions & 2 deletions sdk/batch/speechmatics/batch/_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
from enum import Enum
from typing import Any
from typing import Optional
from typing import cast
from warnings import warn

from typing_extensions import deprecated


class JobType(str, Enum):
Expand All @@ -38,13 +42,21 @@ class JobStatus(str, Enum):
EXPIRED = "expired"


@deprecated("Use Model instead")
class OperatingPoint(str, Enum):
"""Operating point options for transcription."""

ENHANCED = "enhanced"
STANDARD = "standard"


class Model(str, Enum):
"""Operating point options for transcription."""

ENHANCED = "enhanced"
STANDARD = "standard"


class NotificationContents(str, Enum):
"""Notification content options."""

Expand Down Expand Up @@ -75,6 +87,9 @@ class FormatType(str, Enum):
SRT = "srt"


_UNSET = cast(Model, object())


@dataclass
class TranscriptionConfig:
"""
Expand All @@ -83,7 +98,7 @@ class TranscriptionConfig:
Attributes:
language: ISO 639-1 language code (e.g., "en", "es", "fr").
defaults to "en"
operating_point: Which acoustic model to use.
model: Which acoustic model to use.
defaults to "enhanced"
output_locale: RFC-5646 language code for transcript output.
diarization: Type of diarization to use. Options: "none", "speaker".
Expand All @@ -103,7 +118,7 @@ class TranscriptionConfig:
"""

language: str = "en"
operating_point: OperatingPoint = OperatingPoint.ENHANCED
model: Model = _UNSET
output_locale: Optional[str] = None
diarization: Optional[str] = None
additional_vocab: Optional[list[dict[str, Any]]] = None
Expand All @@ -117,6 +132,19 @@ class TranscriptionConfig:
max_delay_mode: Optional[str] = None
transcript_filtering_config: Optional[TranscriptFilteringConfig] = None
audio_filtering_config: Optional[AudioFilteringConfig] = None
operating_point: Optional[OperatingPoint] = None

def __post_init__(self) -> None:
if self.model is not _UNSET and self.operating_point is not None:
raise ValueError("Cannot specify both 'model' and 'operating_point'. Use 'model' instead.")
if self.model is _UNSET and self.operating_point is None:
self.model = Model.ENHANCED
if self.operating_point is not None:
warn(
"'operating_point' is deprecated, use 'model' instead.",
DeprecationWarning,
stacklevel=2,
)

def to_dict(self) -> dict[str, Any]:
result: dict[str, Any] = {k: v for k, v in asdict(self).items() if v is not None}
Expand Down
3 changes: 2 additions & 1 deletion sdk/rt/speechmatics/rt/_async_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

_UNSET = object()


class AsyncClient(_BaseClient):
"""
Asynchronous client for Speechmatics real-time audio transcription.
Expand Down Expand Up @@ -195,7 +196,7 @@ async def force_end_of_utterance(self, *, timestamp: Optional[float] | object =
... await client.force_end_of_utterance()
"""

message: dict[str,Any] = {"message": ClientMessageType.FORCE_END_OF_UTTERANCE}
message: dict[str, Any] = {"message": ClientMessageType.FORCE_END_OF_UTTERANCE}

if timestamp is _UNSET:
# default: auto-set from audio_seconds_sent
Expand Down
6 changes: 6 additions & 0 deletions sdk/rt/speechmatics/rt/_base_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import asyncio
import contextlib
import json
import logging
import os
import uuid
from typing import Any
Expand Down Expand Up @@ -194,6 +195,11 @@ async def _start_recognition_session(
transcription_config = transcription_config or TranscriptionConfig()
audio_format = audio_format or AudioFormat()

if transcription_config.operating_point is not None:
logging.warning(
"TranscriptionConfig.operating_point is deprecated and will be removed in the future. Please use the model property instead."
)

start_recognition_message = build_start_recognition_message(
transcription_config=transcription_config,
audio_format=audio_format,
Expand Down
36 changes: 32 additions & 4 deletions sdk/rt/speechmatics/rt/_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
from enum import Enum
from typing import Any
from typing import Optional
from typing import cast
from warnings import deprecated
from warnings import warn


class AudioEncoding(str, Enum):
Expand All @@ -30,13 +33,21 @@ class AudioEncoding(str, Enum):
MULAW = "mulaw"


@deprecated("Use Model instead")
class OperatingPoint(str, Enum):
"""Operating point options for transcription."""

ENHANCED = "enhanced"
STANDARD = "standard"


class Model(str, Enum):
"""Which model to use for transcription."""

ENHANCED = "enhanced"
STANDARD = "standard"


@dataclass
class AudioEventsConfig:
types: Optional[list[str]] = None
Expand Down Expand Up @@ -329,6 +340,9 @@ class SpeakerIdentifier:
speaker_identifiers: list[str] = field(default_factory=list)


_UNSET = cast(Model, object())
Comment thread
mnemitz marked this conversation as resolved.


@dataclass
class TranscriptionConfig:
"""
Expand All @@ -337,7 +351,7 @@ class TranscriptionConfig:
Attributes:
language: (Optional) ISO 639-1 language code (e.g., "en", "es", "fr").
Defaults to "en".
operating_point: (Optional) Which acoustic model to use.
model: (Optional) Which acoustic model to use.
Defaults to "enhanced".
output_locale: (Optional) RFC-5646 language code for transcript output (eg. "en-US").
Defaults to None.
Expand Down Expand Up @@ -373,7 +387,8 @@ class TranscriptionConfig:
Defaults to None.
channel_diarization_labels: (Optional) Configuration for channel diarization.
Defaults to None.

operating_point: (Deprecated) Legacy argument for specifying the operating point. Use `model` instead going forward
Defaults to None.

Examples:
Basic English transcription:
Expand All @@ -382,7 +397,7 @@ class TranscriptionConfig:
Spanish with partials enabled:
>>> config = TranscriptionConfig(
... language="es",
... operating_point="enhanced",
... model="enhanced",
... enable_partials=True
... )

Expand All @@ -399,7 +414,7 @@ class TranscriptionConfig:
"""

language: str = "en"
operating_point: OperatingPoint = OperatingPoint.ENHANCED
model: Model = _UNSET
output_locale: Optional[str] = None
diarization: Optional[str] = None
additional_vocab: Optional[list[dict[str, Any]]] = None
Expand All @@ -416,6 +431,19 @@ class TranscriptionConfig:
conversation_config: Optional[ConversationConfig] = None
ctrl: Optional[dict] = None
channel_diarization_labels: Optional[list[str]] = None
operating_point: Optional[OperatingPoint] = None

def __post_init__(self) -> None:
if self.model is not _UNSET and self.operating_point is not None:
raise ValueError("Cannot specify both 'model' and 'operating_point'. Use 'model' instead.")
if self.model is _UNSET and self.operating_point is None:
self.model = Model.ENHANCED
if self.operating_point is not None:
warn(
"'operating_point' is deprecated, use 'model' instead.",
DeprecationWarning,
stacklevel=2,
)

def to_dict(self) -> dict[str, Any]:
"""
Expand Down
Loading