|
| 1 | +import os |
| 2 | +import json |
| 3 | +import asyncio |
| 4 | +import aiohttp |
| 5 | + |
| 6 | +import pyaudio |
| 7 | + |
| 8 | +api_key = os.environ["DEEPGRAM_API_KEY"] |
| 9 | +headers = { |
| 10 | + "Authorization": f"Token {api_key}", |
| 11 | +} |
| 12 | + |
| 13 | +# URL for Deepgram WebSocket API |
| 14 | +DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/speak" |
| 15 | + |
| 16 | +# Text to speak |
| 17 | +TEXT: str = """ |
| 18 | +The sun had just begun to rise over the sleepy town of Millfield. |
| 19 | +Emily a young woman in her mid-twenties was already awake and bustling about. |
| 20 | +""" |
| 21 | +TEXT = TEXT.strip() |
| 22 | + |
| 23 | +# Audio settings |
| 24 | +FORMAT = pyaudio.paInt16 |
| 25 | +CHANNELS = 1 |
| 26 | +SAMPLE_RATE = 48000 |
| 27 | +CHUNK_SIZE = 8000 |
| 28 | + |
| 29 | + |
| 30 | +class AsyncSpeaker: |
| 31 | + def __init__( |
| 32 | + self, |
| 33 | + rate: int = SAMPLE_RATE, |
| 34 | + chunk_size: int = CHUNK_SIZE, |
| 35 | + channels: int = CHANNELS, |
| 36 | + output_device_index: int = None, |
| 37 | + ): |
| 38 | + self._audio = pyaudio.PyAudio() |
| 39 | + self._chunk = chunk_size |
| 40 | + self._rate = rate |
| 41 | + self._format = FORMAT |
| 42 | + self._channels = channels |
| 43 | + self._output_device_index = output_device_index |
| 44 | + self._stream = None |
| 45 | + self._audio_queue = asyncio.Queue() |
| 46 | + self._is_playing = False |
| 47 | + |
| 48 | + def start(self) -> bool: |
| 49 | + self._stream = self._audio.open( |
| 50 | + format=self._format, |
| 51 | + channels=self._channels, |
| 52 | + rate=self._rate, |
| 53 | + input=False, |
| 54 | + output=True, |
| 55 | + frames_per_buffer=self._chunk, |
| 56 | + output_device_index=self._output_device_index, |
| 57 | + ) |
| 58 | + self._stream.start_stream() |
| 59 | + self._is_playing = True |
| 60 | + return True |
| 61 | + |
| 62 | + def stop(self): |
| 63 | + self._is_playing = False |
| 64 | + if self._stream is not None: |
| 65 | + self._stream.stop_stream() |
| 66 | + self._stream.close() |
| 67 | + self._stream = None |
| 68 | + |
| 69 | + async def play(self, data): |
| 70 | + await self._audio_queue.put(data) |
| 71 | + |
| 72 | + async def _play_audio(self): |
| 73 | + while self._is_playing: |
| 74 | + try: |
| 75 | + data = await asyncio.wait_for(self._audio_queue.get(), timeout=0.050) |
| 76 | + self._stream.write(data) |
| 77 | + self._audio_queue.task_done() |
| 78 | + except asyncio.TimeoutError: |
| 79 | + continue |
| 80 | + except Exception as e: |
| 81 | + print(f"_play_audio error: {e}") |
| 82 | + break |
| 83 | + |
| 84 | + |
| 85 | +def chunk_text(text: str, words_per_chunk: int): |
| 86 | + words = text.split() |
| 87 | + for i in range(0, len(words), words_per_chunk): |
| 88 | + yield " ".join(words[i : i + words_per_chunk]) |
| 89 | + |
| 90 | + |
| 91 | +async def stream_text_to_websocket(): |
| 92 | + speaker = AsyncSpeaker() |
| 93 | + async with aiohttp.ClientSession() as session: |
| 94 | + url = f"{DEEPGRAM_WS_URL}?encoding=linear16&sample_rate={SAMPLE_RATE}" |
| 95 | + async with session.ws_connect(url, headers=headers) as ws: |
| 96 | + print("WebSocket connection established.") |
| 97 | + CLOSE_MESSAGE_RECEIVED = False |
| 98 | + |
| 99 | + async def send_text_stream(): |
| 100 | + for a_few_words in chunk_text(TEXT, 3): |
| 101 | + await asyncio.sleep(0.5) # pause between sending text |
| 102 | + print(f"Sending: {a_few_words}") |
| 103 | + await ws.send_str( |
| 104 | + json.dumps({"type": "Speak", "text": a_few_words}) |
| 105 | + ) |
| 106 | + await ws.send_str(json.dumps({"type": "Flush"})) |
| 107 | + await ws.send_str(json.dumps({"type": "Close"})) |
| 108 | + # Wait until Deepgram closes the websocket, then close it on this end |
| 109 | + while not CLOSE_MESSAGE_RECEIVED: |
| 110 | + await asyncio.sleep(0.1) |
| 111 | + await ws.close() |
| 112 | + print("WebSocket connection closed.") |
| 113 | + |
| 114 | + async def receive_audio_stream(): |
| 115 | + speaker.start() |
| 116 | + try: |
| 117 | + audio_player = asyncio.create_task(speaker._play_audio()) |
| 118 | + last_audio_duration = 0 |
| 119 | + nonlocal CLOSE_MESSAGE_RECEIVED |
| 120 | + while True: |
| 121 | + try: |
| 122 | + message = await ws.receive(timeout=2) |
| 123 | + except asyncio.TimeoutError: |
| 124 | + continue |
| 125 | + if message.type == aiohttp.WSMsgType.BINARY: |
| 126 | + last_audio_duration = len(message.data) / ( |
| 127 | + SAMPLE_RATE * CHANNELS * 2 |
| 128 | + ) |
| 129 | + await speaker.play(message.data) |
| 130 | + elif message.type == aiohttp.WSMsgType.CLOSE: |
| 131 | + CLOSE_MESSAGE_RECEIVED = True |
| 132 | + break |
| 133 | + |
| 134 | + # Wait for remaining audio to be sent to the player |
| 135 | + await speaker._audio_queue.join() |
| 136 | + # Wait for the last bit of audio to be played |
| 137 | + await asyncio.sleep(last_audio_duration + 0.5) |
| 138 | + speaker.stop() |
| 139 | + audio_player.cancel() |
| 140 | + |
| 141 | + except Exception as e: |
| 142 | + print(f"receiver error: {vars(e)}") |
| 143 | + speaker.stop() |
| 144 | + |
| 145 | + await asyncio.gather(send_text_stream(), receive_audio_stream()) |
| 146 | + |
| 147 | + |
| 148 | +async def main(): |
| 149 | + await stream_text_to_websocket() |
| 150 | + |
| 151 | + |
| 152 | +if __name__ == "__main__": |
| 153 | + asyncio.run(main()) |
0 commit comments