-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathspeech.py
119 lines (105 loc) · 4.17 KB
/
speech.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import asyncio
import pyaudio
import wave
import requests
import argparse
import signal
import sys
import os
API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
def get_api_token():
token = os.getenv('HUGGINGFACE_API_TOKEN')
if token is None:
print("HUGGINGFACE_API_TOKEN environment variable is not set.")
token = input("Please enter your Hugging Face API token: ")
return token
headers = {"Authorization": f"Bearer {get_api_token()}"}
headers = {"Authorization": f"Bearer {os.environ['HUGGINGFACE_API_TOKEN']}"}
class AsyncAudioRecorder:
def __init__(self):
self.filename = None
self.sample_rate = 48000
self.channels = 2
self.format = pyaudio.paInt16
self.frames = []
self.audio = pyaudio.PyAudio()
self.stream = None
async def start_recording(self):
self.frames = []
self.stream = self.audio.open(format=self.format,
channels=self.channels,
rate=self.sample_rate,
input=True,
frames_per_buffer=1024)
print("Recording...")
while True:
data = self.stream.read(1024, exception_on_overflow=False)
self.frames.append(data)
if stop_event.is_set():
break
async def stop_recording(self):
if self.stream and self.stream.is_active():
print("Recording complete.")
self.stream.stop_stream()
self.stream.close()
wf = wave.open(f"{self.filename}", "wb")
wf.setnchannels(self.channels)
wf.setsampwidth(self.audio.get_sample_size(self.format))
wf.setframerate(self.sample_rate)
wf.writeframes(b"".join(self.frames))
wf.close()
self.audio.terminate()
async def query(filename, language='en'):
max_retries = 5
attempt = 0
while attempt < max_retries:
with open(filename, "rb") as f:
data = f.read()
params = {'language': language} if language else {}
response = requests.post(API_URL, headers=headers, data=data, params=params)
if response.status_code == 200:
result = response.json()
if 'error' in result:
if 'estimated_time' in result:
wait_time = result['estimated_time'] + 5 # Adding some buffer time
print(f"Model is loading, retrying in {wait_time:.2f} seconds...")
time.sleep(wait_time)
else:
print(f"Error in response: {result['error']}. Retrying in 5 seconds...")
time.sleep(5)
else:
return result
else:
print(f"Failed to query API: {response.status_code}. Retrying in 5 seconds...")
time.sleep(5)
attempt += 1
raise Exception("Failed to obtain a successful response from the API after several attempts.")
stop_event = asyncio.Event()
async def main(filename, language=None):
global recorder
recorder = AsyncAudioRecorder()
recorder.filename = filename
await recorder.start_recording()
await recorder.stop_recording()
print("Querying the recorded file...")
try:
result = await query(filename, language)
except:
result = await query(filename, language)
finally:
result = await query(filename, language)
print("Query Result:", result)
def signal_handler(signal, frame):
print("Ctrl+C detected, stopping...")
stop_event.set()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Async audio recorder.")
parser.add_argument('-f', '--filename', required=True, help='Filename to save the recording.')
parser.add_argument('-l', '--language', help='Language for audio translation, please use a two char country code like "en" (optional, does not work properly).')
args = parser.parse_args()
signal.signal(signal.SIGINT, signal_handler)
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(main(args.filename, args.language))
finally:
loop.close()