PromtEngineer · manishEMS47 · Jun 8, 2026
diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ Welcome to the Voice Assistant project! 🎙️ Our goal is to create a modular
 ## Features 🧰
 
 - **Modular Design**: Easily switch between different models for transcription, response generation, and TTS.
-- **Support for Multiple APIs**: Integrates with OpenAI, Groq, and Deepgram APIs, along with placeholders for local models.
+- **Support for Multiple APIs**: Integrates with OpenAI, Groq, Deepgram, ElevenLabs, and 60db.ai APIs, along with placeholders for local models.
 - **Audio Recording and Playback**: Record audio from the microphone and play generated speech.
 - **Configuration Management**: Centralized configuration in `config.py` for easy setup and management.
 
@@ -84,6 +84,9 @@ Create a  `.env` file in the root directory and add your API keys:
     OPENAI_API_KEY=your_openai_api_key
     GROQ_API_KEY=your_groq_api_key
     DEEPGRAM_API_KEY=your_deepgram_api_key
+    ELEVENLABS_API_KEY=your_elevenlabs_api_key
+    SIXTYDB_API_KEY=your_60db_api_key
+    SIXTYDB_VOICE_ID=fbb75ed2-975a-40c7-9e06-38e30524a9a1
     LOCAL_MODEL_PATH=path/to/local/model
     PIPER_SERVER_URL=server_url
 ```
@@ -96,12 +99,14 @@ Edit config.py to select the models you want to use:
         # Model selection
         TRANSCRIPTION_MODEL = 'groq'  # Options: 'openai', 'groq', 'deepgram', 'fastwhisperapi' 'local'
         RESPONSE_MODEL = 'groq'       # Options: 'openai', 'groq', 'ollama', 'local'
-        TTS_MODEL = 'deepgram'        # Options: 'openai', 'deepgram', 'elevenlabs', 'local', 'melotts', 'piper'
+        TTS_MODEL = 'deepgram'        # Options: 'openai', 'deepgram', 'elevenlabs', 'sixtydb', 'local', 'melotts', 'piper'
 
         # API keys and paths
         OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
         GROQ_API_KEY = os.getenv("GROQ_API_KEY")
         DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY")
+        ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
+        SIXTYDB_API_KEY = os.getenv("SIXTYDB_API_KEY")
         LOCAL_MODEL_PATH = os.getenv("LOCAL_MODEL_PATH")
 ```
 
@@ -112,12 +117,19 @@ If you are running LLM locally via [Ollama](https://ollama.com/), make sure the
 - Follow this [link](https://elevenlabs.io/app/voice-lab/share/de3746fa51a09e771604d74b5d1ff6797b6b96a5958f9de95cef544dde31dad9/WArWzu0z4mbSyy5BfRKM) to add the Jarvis voice to your ElevenLabs account.
 - Name the voice 'Paul J.' or, if you prefer a different name, ensure it matches the ELEVENLABS_VOICE_ID variable in the text_to_speech.py file.
 
-7. 🏃 **Run the voice assistant**
+7. 🔊 **Configure 60db.ai TTS**
+- Sign up at [60db.ai](https://60db.ai) and get your API key from the dashboard.
+- Add `SIXTYDB_API_KEY` to your `.env` file.
+- Optionally, set `SIXTYDB_VOICE_ID` in your `.env` to use a custom voice (defaults to the platform default voice).
+- You can browse your available voices via the [Voices API](https://docs.60db.ai/api-reference/voices/get-my-voices).
+- Set `TTS_MODEL = 'sixtydb'` in `config.py` to use 60db.ai.
+
+8. 🏃 **Run the voice assistant**
 
 ```shell
    python run_voice_assistant.py
 ```
-8. 🎤 **Install FastWhisperAPI**
+9. 🎤 **Install FastWhisperAPI**
 
    _Optional step if you need a local transcription model_
 
@@ -151,7 +163,7 @@ If you are running LLM locally via [Ollama](https://ollama.com/), make sure the
    ```
    Refer to the repository documentation for the Google Colab method: https://github.com/3choff/FastWhisperAPI/blob/main/README.md
 
-8. 🎤 **Install Local TTS - MeloTTS**
+10. 🎤 **Install Local TTS - MeloTTS**
 
    _Optional step if you need a local Text to Speech model_
 
@@ -167,10 +179,10 @@ If you are running LLM locally via [Ollama](https://ollama.com/), make sure the
    In order to use the local TTS model, you will need to update the `config.py` file by setting: 
 
    ```shell
-      TTS_MODEL = 'melotts'        # Options: 'openai', 'deepgram', 'elevenlabs', 'local', 'melotts', 'piper'
+      TTS_MODEL = 'melotts'        # Options: 'openai', 'deepgram', 'elevenlabs', 'sixtydb', 'local', 'melotts', 'piper'
    ```
 
-9. 🎤 **Install Local TTS - Piper**
+11. 🎤 **Install Local TTS - Piper**
 
    _A faster and lightweight alternative to MeloTTS_
 
@@ -206,7 +218,7 @@ If you are running LLM locally via [Ollama](https://ollama.com/), make sure the
    In order to use the local TTS model, you will need to update the `config.py` file by setting: 
 
    ```shell
-      TTS_MODEL = 'piper'        # Options: 'openai', 'deepgram', 'elevenlabs', 'local', 'melotts','piper'
+      TTS_MODEL = 'piper'        # Options: 'openai', 'deepgram', 'elevenlabs', 'sixtydb', 'local', 'melotts','piper'
    ```
    You can run the main file to start using verbi with local models. 
 
@@ -232,6 +244,10 @@ If you are running LLM locally via [Ollama](https://ollama.com/), make sure the
 - **OpenAI**: Uses OpenAI's TTS model with the 'fable' voice.
 - **Deepgram**: Uses Deepgram's TTS model with the 'aura-angus-en' voice.
 - **ElevenLabs**: Uses ElevenLabs' TTS model with the 'Paul J.' voice.
+- **60db.ai**: Uses 60db.ai's TTS API with the default voice (configurable via `SIXTYDB_VOICE_ID`). Supports multiple languages including English and Indic languages (Hindi, Bengali, Tamil, etc.).
+- **Cartesia**: Uses Cartesia's Sonic English model with real-time streaming playback.
+- **MeloTTS**: Uses the local MeloTTS model (requires local setup).
+- **Piper**: Uses the local Piper model — a fast and lightweight alternative (requires local setup).
 - **Local**: Placeholder for a local TTS model.
 
 ## Detailed Module Descriptions  📘
@@ -253,7 +269,7 @@ If you are running LLM locally via [Ollama](https://ollama.com/), make sure the
 Here's what's next for the Voice Assistant project:
 
 1. **Add Support for Streaming**: Enable real-time streaming of audio input and output.
-2. **Add Support for ElevenLabs and Enhanced Deepgram for TTS**: Integrate additional TTS options for higher quality and variety.
+2. ~~**Add Support for ElevenLabs and Enhanced Deepgram for TTS**~~: Integrated ElevenLabs, 60db.ai, and Cartesia TTS options. ✅
 3. **Add Filler Audios**: Include background or filler audios while waiting for model responses to enhance user experience.
 4. **Add Support for Local Models Across the Board**: Expand support for local models in transcription, response generation, and TTS.
 

diff --git a/example.env b/example.env
@@ -2,6 +2,8 @@ OPENAI_API_KEY="OPENAI_API_KEY"
 GROQ_API_KEY="GROQ_API_KEY"
 DEEPGRAM_API_KEY="DEEPGRAM_API_KEY"
 ELEVENLABS_API_KEY="ELEVENLABS_API_KEY"
+SIXTYDB_API_KEY="SIXTYDB_API_KEY"
+SIXTYDB_VOICE_ID="fbb75ed2-975a-40c7-9e06-38e30524a9a1"
 CARTESIA_API_KEY="CARTESIA_API_KEY"
 LOCAL_MODEL_PATH=path/to/local/model
 PIPER_SERVER_URL=http://localhost:5000
diff --git a/run_voice_assistant.py b/run_voice_assistant.py
@@ -65,7 +65,7 @@ def main():
             chat_history.append({"role": "assistant", "content": response_text})
 
             # Determine the output file format based on the TTS model
-            if Config.TTS_MODEL == 'openai' or Config.TTS_MODEL == 'elevenlabs' or Config.TTS_MODEL == 'melotts' or Config.TTS_MODEL == 'cartesia':
+            if Config.TTS_MODEL == 'openai' or Config.TTS_MODEL == 'elevenlabs' or Config.TTS_MODEL == 'sixtydb' or Config.TTS_MODEL == 'melotts' or Config.TTS_MODEL == 'cartesia':
                 output_file = 'output.mp3'
             else:
                 output_file = 'output.wav'

diff --git a/voice_assistant/api_key_manager.py b/voice_assistant/api_key_manager.py
@@ -15,7 +15,8 @@
     "tts": {
         "openai": Config.OPENAI_API_KEY,
         "deepgram":Config.DEEPGRAM_API_KEY,
-        "elevenlabs": Config.ELEVENLABS_API_KEY
+        "elevenlabs": Config.ELEVENLABS_API_KEY,
+        "sixtydb": Config.SIXTYDB_API_KEY
     }
 }
 

diff --git a/voice_assistant/config.py b/voice_assistant/config.py
@@ -13,17 +13,19 @@ class Config:
     Attributes:
         TRANSCRIPTION_MODEL (str): The model to use for transcription ('openai', 'groq', 'deepgram', 'fastwhisperapi', 'local').
         RESPONSE_MODEL (str): The model to use for response generation ('openai', 'groq', 'local').
-        TTS_MODEL (str): The model to use for text-to-speech ('openai', 'deepgram', 'elevenlabs', 'local').
+        TTS_MODEL (str): The model to use for text-to-speech ('openai', 'deepgram', 'elevenlabs', 'sixtydb', 'local').
         OPENAI_API_KEY (str): API key for OpenAI services.
         GROQ_API_KEY (str): API key for Groq services.
         DEEPGRAM_API_KEY (str): API key for Deepgram services.
         ELEVENLABS_API_KEY (str): API key for ElevenLabs services.
+        SIXTYDB_API_KEY (str): API key for 60db.ai services.
+        SIXTYDB_VOICE_ID (str): Voice ID for 60db.ai TTS.
         LOCAL_MODEL_PATH (str): Path to the local model.
     """
     # Model selection
     TRANSCRIPTION_MODEL = 'deepgram'  # possible values: openai, groq, deepgram, fastwhisperapi
     RESPONSE_MODEL = 'openai'  # possible values: openai, groq, ollama
-    TTS_MODEL = 'openai'  # possible values: openai, deepgram, elevenlabs, melotts, cartesia, piper
+    TTS_MODEL = 'openai'  # possible values: openai, deepgram, elevenlabs, sixtydb, melotts, cartesia, piper
 
     # Piper Server configuration
     PIPER_SERVER_URL = os.getenv("PIPER_SERVER_URL")
@@ -42,6 +44,8 @@ class Config:
     GROQ_API_KEY = os.getenv("GROQ_API_KEY")
     DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY")
     ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
+    SIXTYDB_API_KEY = os.getenv("SIXTYDB_API_KEY")
+    SIXTYDB_VOICE_ID = os.getenv("SIXTYDB_VOICE_ID", "fbb75ed2-975a-40c7-9e06-38e30524a9a1")
     LOCAL_MODEL_PATH = os.getenv("LOCAL_MODEL_PATH")
     CARTESIA_API_KEY = os.getenv("CARTESIA_API_KEY")
 
@@ -64,7 +68,7 @@ def validate_config():
         Config._validate_model('RESPONSE_MODEL', [
             'openai', 'groq', 'ollama', 'local'])
         Config._validate_model('TTS_MODEL', [
-            'openai', 'deepgram', 'elevenlabs', 'melotts', 'cartesia', 'local', 'piper'])
+            'openai', 'deepgram', 'elevenlabs', 'sixtydb', 'melotts', 'cartesia', 'local', 'piper'])
 
         Config._validate_api_key('TRANSCRIPTION_MODEL', 'openai', 'OPENAI_API_KEY')
         Config._validate_api_key('TRANSCRIPTION_MODEL', 'groq', 'GROQ_API_KEY')
@@ -76,6 +80,7 @@ def validate_config():
         Config._validate_api_key('TTS_MODEL', 'openai', 'OPENAI_API_KEY')
         Config._validate_api_key('TTS_MODEL', 'deepgram', 'DEEPGRAM_API_KEY')
         Config._validate_api_key('TTS_MODEL', 'elevenlabs', 'ELEVENLABS_API_KEY')
+        Config._validate_api_key('TTS_MODEL', 'sixtydb', 'SIXTYDB_API_KEY')
         Config._validate_api_key('TTS_MODEL', 'cartesia', 'CARTESIA_API_KEY')
 
     @staticmethod

diff --git a/voice_assistant/text_to_speech.py b/voice_assistant/text_to_speech.py
@@ -1,6 +1,7 @@
 # voice_assistant/text_to_speech.py
 import logging
 import json
+import base64
 import pyaudio
 import elevenlabs
 import soundfile as sf
@@ -58,6 +59,34 @@ def text_to_speech(model: str, api_key:str, text:str, output_file_path:str, loca
                 model="eleven_turbo_v2"
             )
             elevenlabs.save(audio, output_file_path)
+
+        elif model == 'sixtydb':
+            response = requests.post(
+                "https://api.60db.ai/tts-synthesize",
+                headers={
+                    "Authorization": f"Bearer {api_key}",
+                    "Content-Type": "application/json"
+                },
+                json={
+                    "text": text,
+                    "voice_id": Config.SIXTYDB_VOICE_ID,
+                    "enhance": True,
+                    "speed": 1,
+                    "stability": 50,
+                    "similarity": 75,
+                    "output_format": "mp3"
+                }
+            )
+            response.raise_for_status()
+            data = response.json()
+
+            if not data.get("success"):
+                raise ValueError(f"60db TTS failed: {data.get('message', 'Unknown error')}")
+
+            audio_bytes = base64.b64decode(data["audio_base64"])
+            with open(output_file_path, "wb") as f:
+                f.write(audio_bytes)
+            logging.info(f"60db TTS audio saved to {output_file_path} ({data.get('duration_seconds', '?')}s)")
 
         elif model == "cartesia":
             client = Cartesia(api_key=api_key)