Skip to content

Commit

Permalink
speechmatics initial setup as model selection
Browse files Browse the repository at this point in the history
  • Loading branch information
josancamon19 committed Sep 19, 2024
1 parent 59072b1 commit 76d0eb7
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 4 deletions.
4 changes: 4 additions & 0 deletions app/lib/backend/http/api/memories.dart
Original file line number Diff line number Diff line change
Expand Up @@ -188,18 +188,22 @@ class TranscriptsResponse {
List<TranscriptSegment> deepgram;
List<TranscriptSegment> soniox;
List<TranscriptSegment> whisperx;
List<TranscriptSegment> speechmatics;

TranscriptsResponse({
this.deepgram = const [],
this.soniox = const [],
this.whisperx = const [],
this.speechmatics = const [],
});

factory TranscriptsResponse.fromJson(Map<String, dynamic> json) {
return TranscriptsResponse(
deepgram: (json['deepgram'] as List<dynamic>).map((segment) => TranscriptSegment.fromJson(segment)).toList(),
soniox: (json['soniox'] as List<dynamic>).map((segment) => TranscriptSegment.fromJson(segment)).toList(),
whisperx: (json['whisperx'] as List<dynamic>).map((segment) => TranscriptSegment.fromJson(segment)).toList(),
speechmatics:
(json['speechmatics'] as List<dynamic>).map((segment) => TranscriptSegment.fromJson(segment)).toList(),
);
}
}
Expand Down
20 changes: 20 additions & 0 deletions app/lib/backend/schema/transcript_segment.dart
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,26 @@ class TranscriptSegment {
cleanSegments(joinedSimilarSegments);

segments.addAll(joinedSimilarSegments);

// for i, segment in enumerate(segments):
// segments[i].text = (
// segments[i].text.strip()
// .replace(' ', '')
// .replace(' ,', ',')
// .replace(' .', '.')
// .replace(' ?', '?')
// )

// Speechmatics specific issue with punctuation
for (var i = 0; i < segments.length; i++) {
segments[i].text = segments[i]
.text
.replaceAll(' ', '')
.replaceAll(' ,', ',')
.replaceAll(' .', '.')
.replaceAll(' ?', '?')
.trim();
}
}

static String segmentsAsString(
Expand Down
21 changes: 19 additions & 2 deletions app/lib/pages/memory_detail/compare_transcripts.dart
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class _CompareTranscriptsPageState extends State<CompareTranscriptsPage> {
backgroundColor: Theme.of(context).colorScheme.primary,
),
body: DefaultTabController(
length: 3,
length: 4,
initialIndex: 0,
child: Column(
children: [
Expand All @@ -50,7 +50,12 @@ class _CompareTranscriptsPageState extends State<CompareTranscriptsPage> {
padding: EdgeInsets.zero,
indicatorPadding: EdgeInsets.zero,
labelStyle: Theme.of(context).textTheme.titleLarge!.copyWith(fontSize: 18),
tabs: const [Tab(text: 'Deepgram'), Tab(text: 'Soniox'), Tab(text: 'Whisper-x')],
tabs: const [
Tab(text: 'Deepgram'),
Tab(text: 'Soniox'),
Tab(text: 'SpeechMatics'),
Tab(text: 'Whisper-x'),
],
indicator: BoxDecoration(color: Colors.transparent, borderRadius: BorderRadius.circular(16)),
),
Expanded(
Expand Down Expand Up @@ -84,6 +89,18 @@ class _CompareTranscriptsPageState extends State<CompareTranscriptsPage> {
)
],
),
ListView(
shrinkWrap: true,
children: [
TranscriptWidget(
segments: transcripts?.speechmatics ?? [],
horizontalMargin: false,
topMargin: false,
canDisplaySeconds: true,
isMemoryDetail: true,
)
],
),
ListView(
shrinkWrap: true,
children: [
Expand Down
2 changes: 1 addition & 1 deletion app/lib/pages/settings/developer.dart
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ class __DeveloperSettingsPageState extends State<_DeveloperSettingsPage> {
underline: Container(height: 0, color: Colors.white),
isExpanded: true,
itemHeight: 48,
items: ['deepgram', 'soniox'].map<DropdownMenuItem<String>>((String value) {
items: ['deepgram', 'soniox', 'speechmatics'].map<DropdownMenuItem<String>>((String value) {
return DropdownMenuItem<String>(
value: value,
child: Text(
Expand Down
119 changes: 118 additions & 1 deletion backend/utils/stt/streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,8 @@ async def on_message():
segments[i]['text'] = segments[i]['text'].strip().replace(' ', '')

# print('Soniox:', transcript.replace('<end>', ''))
stream_transcript(segments, stream_id)
if segments:
stream_transcript(segments, stream_id)
except websockets.exceptions.ConnectionClosedOK:
print("Soniox connection closed normally.")
except Exception as e:
Expand All @@ -276,3 +277,119 @@ async def on_message():
except Exception as e:
print(f"Exception in process_audio_soniox: {e}")
raise # Re-raise the exception to be handled by the caller


LANGUAGE = "en"
CONNECTION_URL = f"wss://eu2.rt.speechmatics.com/v2"


async def process_audio_speechmatics(stream_transcript, stream_id: int, language: str, uid: str):
# Create a transcription client
api_key = os.getenv('SPEECHMATICS_API_KEY')
uri = 'wss://eu2.rt.speechmatics.com/v2'
# Validate the language and construct the model name
# has_speech_profile = create_user_speech_profile(uid) # only english too

request = {
"message": "StartRecognition",
"transcription_config": {
"language": language,
"diarization": "speaker",
"operating_point": "enhanced",
"max_delay_mode": "flexible",
"max_delay": 3,
"enable_partials": False,
"enable_entities": True,
"speaker_diarization_config": {"max_speakers": 4}
},
"audio_format": {"type": "raw", "encoding": "pcm_s16le", "sample_rate": 16000},
# "audio_events_config": {
# "types": [
# "laughter",
# "music",
# "applause"
# ]
# }
}
try:
# Connect to Soniox WebSocket
print("Connecting to Speechmatics WebSocket...")
socket = await websockets.connect(uri, extra_headers={"Authorization": f"Bearer {api_key}"})
print("Connected to Speechmatics WebSocket.")

# Send the initial request
await socket.send(json.dumps(request))
print(f"Sent initial request: {request}")

# Start listening for messages from Soniox
async def on_message():
try:
async for message in socket:
response = json.loads(message)
if response['message'] == 'AudioAdded':
continue
if response['message'] == 'AddTranscript':
results = response['results']
if not results:
continue
segments = []
for r in results:
# print(r)
if not r['alternatives']:
continue
r_data = r['alternatives'][0]
r_type = r['type'] # word | punctuation
r_start = r['start_time']
r_end = r['end_time']

r_content = r_data['content']
r_confidence = r_data['confidence']
if r_confidence < 0.4:
print('Low confidence:', r)
continue
r_speaker = r_data['speaker'][1:] if r_data['speaker'] != 'UU' else '1'
speaker = f"SPEAKER_0{r_speaker}"
# print(r_content, r_speaker, [r_start, r_end])
if not segments:
segments.append({
'speaker': speaker,
'start': r_start,
'end': r_end,
'text': r_content,
'is_user': False,
'person_id': None,
})
else:
last_segment = segments[-1]
if last_segment['speaker'] == speaker:
last_segment['text'] += f' {r_content}'
last_segment['end'] += r_end
else:
segments.append({
'speaker': speaker,
'start': r_start,
'end': r_end,
'text': r_content,
'is_user': False,
'person_id': None,
})

if segments:
stream_transcript(segments, stream_id)
# print('---')
else:
print(response)
except websockets.exceptions.ConnectionClosedOK:
print("Speechmatics connection closed normally.")
except Exception as e:
print(f"Error receiving from Speechmatics: {e}")
finally:
if not socket.closed:
await socket.close()
print("Speechmatics WebSocket closed in on_message.")

asyncio.create_task(on_message())
return socket
except Exception as e:
print(f"Exception in process_audio_speechmatics: {e}")
raise

0 comments on commit 76d0eb7

Please sign in to comment.