从新提交到gitee 仓库
This commit is contained in:
@@ -70,6 +70,8 @@ class Base(ABC):
|
||||
def tts(self, audio):
|
||||
pass
|
||||
|
||||
def end_tts(self):
|
||||
pass
|
||||
def normalize_text(self, text):
|
||||
return re.sub(r'(\*\*|##\d+\$\$|#)', '', text)
|
||||
|
||||
@@ -118,15 +120,30 @@ class FishAudioTTS(Base):
|
||||
class QwenTTS(Base):
|
||||
def __init__(self, key, model_name, base_url=""):
|
||||
import dashscope
|
||||
|
||||
print("---begin--init QwenTTS--") # cyx
|
||||
self.model_name = model_name
|
||||
dashscope.api_key = key
|
||||
|
||||
self.synthesizer = None
|
||||
self.callback = None
|
||||
self.is_cosyvoice = False
|
||||
self.voice = ""
|
||||
if '/' in model_name:
|
||||
parts = model_name.split('/', 1)
|
||||
# 返回分离后的两个字符串parts[0], parts[1]
|
||||
if parts[0] == 'cosyvoice-v1':
|
||||
self.is_cosyvoice = True
|
||||
self.voice = parts[1]
|
||||
def tts(self, text):
|
||||
from dashscope.api_entities.dashscope_response import SpeechSynthesisResponse
|
||||
from dashscope.audio.tts import ResultCallback, SpeechSynthesizer, SpeechSynthesisResult
|
||||
from collections import deque
|
||||
|
||||
if self.is_cosyvoice is False:
|
||||
from dashscope.audio.tts import ResultCallback, SpeechSynthesizer, SpeechSynthesisResult
|
||||
from collections import deque
|
||||
else:
|
||||
# cyx 2025 01 19 测试cosyvoice 使用tts_v2 版本
|
||||
from dashscope.audio.tts_v2 import ResultCallback, SpeechSynthesizer, AudioFormat #, SpeechSynthesisResult
|
||||
from dashscope.audio.tts import SpeechSynthesisResult
|
||||
from collections import deque
|
||||
# print(f"--QwenTTS--tts_stream begin-- {text}") # cyx
|
||||
class Callback(ResultCallback):
|
||||
def __init__(self) -> None:
|
||||
self.dque = deque()
|
||||
@@ -149,6 +166,7 @@ class QwenTTS(Base):
|
||||
self.dque.append(None)
|
||||
|
||||
def on_error(self, response: SpeechSynthesisResponse):
|
||||
print("Qwen tts error", str(response))
|
||||
raise RuntimeError(str(response))
|
||||
|
||||
def on_close(self):
|
||||
@@ -158,20 +176,91 @@ class QwenTTS(Base):
|
||||
if result.get_audio_frame() is not None:
|
||||
self.dque.append(result.get_audio_frame())
|
||||
|
||||
|
||||
|
||||
# --------------------------
|
||||
|
||||
class Callback_v2(ResultCallback):
|
||||
def __init__(self) -> None:
|
||||
self.dque = deque()
|
||||
|
||||
def _run(self):
|
||||
while True:
|
||||
if not self.dque:
|
||||
time.sleep(0)
|
||||
continue
|
||||
val = self.dque.popleft()
|
||||
if val:
|
||||
yield val
|
||||
else:
|
||||
break
|
||||
|
||||
def on_open(self):
|
||||
pass
|
||||
|
||||
def on_complete(self):
|
||||
self.dque.append(None)
|
||||
|
||||
def on_error(self, response: SpeechSynthesisResponse):
|
||||
print("Qwen tts error", str(response))
|
||||
raise RuntimeError(str(response))
|
||||
|
||||
def on_close(self):
|
||||
print("---Qwen call back close") # cyx
|
||||
pass
|
||||
""" canceled for test 语音大模型CosyVoice
|
||||
def on_event(self, result: SpeechSynthesisResult):
|
||||
if result.get_audio_frame() is not None:
|
||||
self.dque.append(result.get_audio_frame())
|
||||
"""
|
||||
|
||||
def on_event(self, message):
|
||||
# print(f"recv speech synthsis message {message}")
|
||||
pass
|
||||
# 以下适合语音大模型CosyVoice
|
||||
def on_data(self, data: bytes) -> None:
|
||||
if len(data) > 0:
|
||||
self.dque.append(data)
|
||||
|
||||
|
||||
# --------------------------
|
||||
text = self.normalize_text(text)
|
||||
callback = Callback()
|
||||
SpeechSynthesizer.call(model=self.model_name,
|
||||
text=text,
|
||||
callback=callback,
|
||||
format="mp3")
|
||||
|
||||
|
||||
try:
|
||||
for data in callback._run():
|
||||
#if self.model_name != 'cosyvoice-v1':
|
||||
if self.is_cosyvoice is False:
|
||||
self.callback = Callback()
|
||||
SpeechSynthesizer.call(model=self.model_name,
|
||||
text=text,
|
||||
callback=self.callback,
|
||||
format="mp3")
|
||||
else:
|
||||
self.callback = Callback_v2()
|
||||
self.synthesizer = SpeechSynthesizer(
|
||||
model='cosyvoice-v1',
|
||||
# voice="longyuan", #"longfei",
|
||||
voice = self.voice,
|
||||
callback=self.callback,
|
||||
format=AudioFormat.MP3_44100HZ_MONO_256KBPS
|
||||
)
|
||||
|
||||
self.synthesizer.call(text)
|
||||
except Exception as e:
|
||||
print(f"---dale---20 error {e}") # cyx
|
||||
# -----------------------------------
|
||||
try:
|
||||
for data in self.callback._run():
|
||||
yield data
|
||||
print(f"---Qwen return data {num_tokens_from_string(text)}")
|
||||
yield num_tokens_from_string(text)
|
||||
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"**ERROR**: {e}")
|
||||
raise RuntimeError(f"**ERROR**: {e}")
|
||||
|
||||
def end_tts(self):
|
||||
if self.synthesizer:
|
||||
self.synthesizer.streaming_complete()
|
||||
|
||||
class OpenAITTS(Base):
|
||||
def __init__(self, key, model_name="tts-1", base_url="https://api.openai.com/v1"):
|
||||
|
||||
Reference in New Issue
Block a user