Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions livekit-plugins/livekit-plugins-simplismart/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Simplismart Plugin for LiveKit Agents

Support for [Simplismart.ai](https://simplismart.ai)'s voice AI services in LiveKit Agents.

## Features

- **Speech-to-Text (STT)**: Convert audio to text using Simplismart's STT models. See the [STT docs](https://docs.livekit.io/agents/integrations/stt/simplismart/) for more information.
- **Text-to-Speech (TTS)**: Convert text to audio using Simplismart's TTS models. See the [TTS docs](https://docs.livekit.io/agents/integrations/tts/simplismart/) for more information.

## Installation

```bash
pip install livekit-plugins-simplismart
```

## Pre-requisites

You'll need an API key from Simplismart.ai. It can be set as an environment variable: `SIMPLISMART_API_KEY`
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright 2025 LiveKit, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""SimpliSmart plugin for LiveKit Agents

Support for speech-to-text and text-to-speech with [SimpliSmart](https://simplismart.ai/).

SimpliSmart provides high-quality STT and TTS for Indian languages.

For API access, visit https://simplismart.ai/
"""

from .stt import STT
from .tts import TTS
from .version import __version__

__all__ = ["STT", "TTS", "__version__"]


from livekit.agents import Plugin

from .log import logger


class SimplismartPlugin(Plugin):
def __init__(self) -> None:
super().__init__(__name__, __version__, __package__, logger)


Plugin.register_plugin(SimplismartPlugin())

# Cleanup docs of unexported modules
_module = dir()
NOT_IN_ALL = [m for m in _module if m not in __all__]

__pdoc__ = {}

for n in NOT_IN_ALL:
__pdoc__[n] = False
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import logging

logger = logging.getLogger("livekit.plugins.simplismart")
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
# Copyright 2025 LiveKit, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Speech-to-Text implementation for SimpliSmart

This module provides an STT implementation that uses the SimpliSmart API.
"""

import asyncio
import base64
import os
from typing import Any, Literal

import aiohttp
from pydantic import BaseModel

from livekit.agents import (
APIConnectionError,
APIConnectOptions,
APIStatusError,
APITimeoutError,
stt,
utils,
)
from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS, NOT_GIVEN, NotGivenOr
from livekit.agents.utils import AudioBuffer, rtc

from .log import logger


class SimplismartSTTOptions(BaseModel):
language: str | None = None
task: Literal["transcribe", "translate"] = "transcribe"
without_timestamps: bool = True
vad_model: Literal["silero", "frame"] = "frame"
vad_filter: bool = True
model: str | None = "openai/whisper-large-v3-turbo"
word_timestamps: bool = False
vad_onset: float | None = 0.5
vad_offset: float | None = None
min_speech_duration_ms: int = 0
max_speech_duration_s: float = 30
min_silence_duration_ms: int = 2000
speech_pad_ms: int = 400
diarization: bool = False
initial_prompt: str | None = None
hotwords: str | None = None
num_speakers: int = 0
compression_ratio_threshold: float | None = 2.4
beam_size: int = 4
temperature: float = 0.0
multilingual: bool = False
max_tokens: float | None = 400
log_prob_threshold: float | None = -1.0
length_penalty: int = 1
repetition_penalty: float = 1.01
suppress_tokens: list[int] = [-1]
strict_hallucination_reduction: bool = False


class STT(stt.STT):
def __init__(
self,
*,
base_url: str,
api_key: str | None = None,
model: str | None = None,
params: dict[str, Any] | SimplismartSTTOptions | None = None,
http_session: aiohttp.ClientSession | None = None,
):
super().__init__(
capabilities=stt.STTCapabilities(
streaming=False,
interim_results=False,
aligned_transcript="word",
)
)

self._api_key = api_key or os.environ.get("SIMPLISMART_API_KEY")
if not self._api_key:
raise ValueError("SIMPLISMART_API_KEY is not set")

if params is None:
params = SimplismartSTTOptions()

if isinstance(params, SimplismartSTTOptions):
self._opts = params
self._model = params.model
else:
self._opts = SimplismartSTTOptions(**params)

self._base_url = base_url
self._logger = logger.getChild(self.__class__.__name__)
self._session = http_session


@property
def provider(self) -> str:
return "Simplismart"

def _ensure_session(self) -> aiohttp.ClientSession:
if not self._session:
self._session = utils.http_context.http_session()
return self._session

async def _recognize_impl(
self,
buffer: AudioBuffer,
*,
language: NotGivenOr[str] = NOT_GIVEN,
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
) -> stt.SpeechEvent:
language = self._opts.language if isinstance(language, type(NOT_GIVEN)) else language
wav_bytes = rtc.combine_audio_frames(buffer).to_wav_bytes()

audio_b64 = base64.b64encode(wav_bytes).decode("utf-8")
payload = self._opts.model_dump()

payload["audio_data"] = audio_b64
payload["language"] = language
payload["model"] = self._model

try:
async with self._ensure_session().post(
self._base_url,
json=payload,
headers={
"Authorization": f"Bearer {self._api_key}",
"Content-Type": "application/json",
},
) as res:
if res.status != 200:
error_text = await res.text()
self._logger.error(f"Simplismart API error: {res.status} - {error_text}")
raise APIStatusError(
message=f"Simplismart API Error: {error_text}",
status_code=res.status,
)

response_json = await res.json()

detected_language = response_json["info"]["language"]

start_time = response_json["timestamps"][0][0]
end_time = response_json["timestamps"][-1][1]
request_id = response_json.get("request_id", "")
text = "".join(response_json["transcription"])

alternatives = [
stt.SpeechData(
language=detected_language,
text=text,
start_time=start_time,
end_time=end_time,
confidence=1.0,
),
]

return stt.SpeechEvent(
type=stt.SpeechEventType.FINAL_TRANSCRIPT,
request_id=request_id,
alternatives=alternatives,
)
except asyncio.TimeoutError as e:
self._logger.error(f"Simplismart API timeout: {e}")
raise APITimeoutError("Simplismart API request timed out") from e
except aiohttp.ClientError as e:
self._logger.error(f"Simplismart API client error: {e}")
raise APIConnectionError(f"Simplismart API connection error: {e}") from e
except Exception as e:
self._logger.error(f"Error during Simplismart STT processing: {e}")
raise APIConnectionError(f"Unexpected error in Simplismart STT: {e}") from e
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import asyncio
import os
from typing import Any

import aiohttp

# from .log import logger
from pydantic import BaseModel

from livekit.agents import (
DEFAULT_API_CONNECT_OPTIONS,
APIConnectionError,
APIConnectOptions,
APIStatusError,
APITimeoutError,
tts,
utils,
)


class SimplismartTTSOptions(BaseModel):
temperature: float = 0.7
top_p: float = 0.9
repetition_penalty: float = 1.5
max_tokens: int = 1000


class TTS(tts.TTS):
def __init__(
self,
*,
base_url: str,
model: str | None = "Simplismart/orpheus-3b-0.1-ft",
voice="tara",
api_key: str | None = None,
params: dict[str, Any] | SimplismartTTSOptions | None = None,
http_session: aiohttp.ClientSession | None = None,
**kwargs: Any,
) -> None:
super().__init__(
capabilities=tts.TTSCapabilities(streaming=False),
sample_rate=24000,
num_channels=1,
)

self._base_url = base_url
self._model = model
self._voice = voice
self._api_key = api_key or os.environ.get("SIMPLISMART_API_KEY")
if not self._api_key:
raise ValueError("SIMPLISMART_API_KEY is not set")

self._session = http_session

if params is None:
params = SimplismartTTSOptions()

if isinstance(params, SimplismartTTSOptions):
self._opts = params
else:
self._opts = SimplismartTTSOptions(**params)

@property
def model(self) -> str:
return self._model

@property
def provider(self) -> str:
return "SimpliSmart"

def _ensure_session(self) -> aiohttp.ClientSession:
if not self._session:
self._session = utils.http_context.http_session()
return self._session

def synthesize(
self,
text: str,
*,
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
) -> "ChunkedStream":
return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)


class ChunkedStream(tts.ChunkedStream):
def __init__(self, *, tts: TTS, input_text: str, conn_options: APIConnectOptions) -> None:
super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
self._tts: TTS = tts
self._opts = tts._opts
self._model = tts._model

async def _run(self, output_emitter: tts.AudioEmitter) -> None:
payload = self._opts.model_dump()
payload["prompt"] = self._input_text
payload["voice"] = self._tts._voice
payload["model"] = "Simplismart/orpheus-3b-0.1-ft"

headers = {
"Authorization": f"Bearer {self._tts._api_key}",
"Content-Type": "application/json",
}

try:
async with self._tts._ensure_session().post(
self._tts._base_url,
json=payload,
headers=headers,
timeout=aiohttp.ClientTimeout(
total=self._conn_options.timeout,
sock_connect=self._conn_options.timeout,
),
) as resp:
resp.raise_for_status()
output_emitter.initialize(
request_id=utils.shortuuid(),
sample_rate=self._tts.sample_rate,
num_channels=self._tts.num_channels,
mime_type="audio/pcm",
)
async for audio_data, _ in resp.content.iter_chunks():
output_emitter.push(audio_data)
except asyncio.TimeoutError:
raise APITimeoutError() from None
except aiohttp.ClientResponseError as e:
raise APIStatusError(
message=e.message, status_code=e.status, request_id=None, body=None
) from None
except Exception as e:
raise APIConnectionError() from e
Loading
Loading