在生成对话文字时,同时在后台生成tts音频,增加朗读音色选择,增加博物馆的概况接口

This commit is contained in:
qcloud
2025-02-23 09:52:30 +08:00
parent c88312a914
commit a5e83f4d3b
7 changed files with 653 additions and 224 deletions

View File

@@ -19,22 +19,22 @@ import logging
from copy import deepcopy
from uuid import uuid4
from api.db import LLMType
from flask import request, Response, jsonify
from flask import request, Response, jsonify, stream_with_context
from api.db.services.dialog_service import ask
from agent.canvas import Canvas
from api.db import StatusEnum
from api.db.db_models import API4Conversation
from api.db.services.api_service import API4ConversationService
from api.db.services.canvas_service import UserCanvasService
from api.db.services.dialog_service import DialogService, ConversationService, chat
from api.db.services.dialog_service import DialogService, ConversationService, chat,stream_manager
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.utils import get_uuid
from api.utils.api_utils import get_error_data_result
from api.utils.api_utils import get_result, token_required
from api.db.services.llm_service import LLMBundle
import uuid
import queue
import queue,time
from threading import Lock,Thread
@manager.route('/chats/<chat_id>/sessions', methods=['POST'])
@token_required
@@ -239,186 +239,6 @@ def completion(tenant_id, chat_id): # chat_id 和 别的文件中的dialog_id
break
return get_result(data=answer)
# 全角字符到半角字符的映射
def fullwidth_to_halfwidth(s):
full_to_half_map = {
'': '!', '': '"', '': '#', '': '$', '': '%', '': '&', '': "'",
'': '(', '': ')', '': '*', '': '+', '': ',', '': '-', '': '.',
'': '/', '': ':', '': ';', '': '<', '': '=', '': '>', '': '?',
'': '@', '': '[', '': '\\', '': ']', '': '^', '_': '_', '': '`',
'': '{', '': '|', '': '}', '': '~', '': '', '': '', '': '',
'': '', '': ',', '': '.', '': '-', '': '.', '': '', '': '',
'': '', '': '', '': ':'
}
return ''.join(full_to_half_map.get(char, char) for char in s)
def is_dale(s):
full_to_half_map = {
'': '!', '': '"', '': '#', '': '$', '': '%', '': '&', '': "'",
'': '(', '': ')', '': '*', '': '+', '': ',', '': '-', '': '.',
'': '/', '': ':', '': ';', '': '<', '': '=', '': '>', '': '?',
'': '@', '': '[', '': '\\', '': ']', '': '^', '_': '_', '': '`',
'': '{', '': '|', '': '}', '': '~', '': '', '': '', '': '',
'': '', '': ',', '': '.', '': '-', '': '.', '': '', '': '',
'': '', '': '', '': ':', '': '.'
}
def extract_text_from_markdown(markdown_text):
# 移除Markdown标题
text = re.sub(r'#\s*[^#]+', '', markdown_text)
# 移除内联代码块
text = re.sub(r'`[^`]+`', '', text)
# 移除代码块
text = re.sub(r'```[\s\S]*?```', '', text)
# 移除加粗和斜体
text = re.sub(r'[*_]{1,3}(?=\S)(.*?\S[*_]{1,3})', '', text)
# 移除链接
text = re.sub(r'\[.*?\]\(.*?\)', '', text)
# 移除图片
text = re.sub(r'!\[.*?\]\(.*?\)', '', text)
# 移除HTML标签
text = re.sub(r'<[^>]+>', '', text)
# 转换标点符号
# text = re.sub(r'[^\w\s]', '', text)
text = fullwidth_to_halfwidth(text)
# 移除多余的空格
text = re.sub(r'\s+', ' ', text).strip()
return text
def split_text_at_punctuation(text, chunk_size=100):
# 使用正则表达式找到所有的标点符号和特殊字符
punctuation_pattern = r'[\s,.!?;:\-\\(\)\[\]{}"\'\\\/]+'
tokens = re.split(punctuation_pattern, text)
# 移除空字符串
tokens = [token for token in tokens if token]
# 存储最终的文本块
chunks = []
current_chunk = ''
for token in tokens:
if len(current_chunk) + len(token) <= chunk_size:
# 如果添加当前token后长度不超过chunk_size则添加到当前块
current_chunk += (token + ' ')
else:
# 如果长度超过chunk_size则将当前块添加到chunks列表并开始新块
chunks.append(current_chunk.strip())
current_chunk = token + ' '
# 添加最后一个块(如果有剩余)
if current_chunk:
chunks.append(current_chunk.strip())
return chunks
audio_text_cache = {}
@manager.route('/chats/<chat_id>/tts/<audio_stream_id>', methods=['GET'])
def dialog_tts_get(chat_id, audio_stream_id):
tts_info = audio_text_cache.pop(audio_stream_id, None)
req = tts_info
if not req:
return get_error_data_result(message="Audio stream not found or expired.")
audio_stream = req.get('audio_stream')
tenant_id = req.get('tenant_id')
chat_id = req.get('chat_id')
text = req.get('text', "..")
model_name = req.get('model_name')
dia = DialogService.get(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value)
if not dia:
return get_error_data_result(message="You do not own the chat")
tts_model_name = dia.tts_id
if model_name: tts_model_name = model_name
tts_mdl = LLMBundle(dia.tenant_id, LLMType.TTS, tts_model_name) # dia.tts_id)
def stream_audio():
try:
for chunk in tts_mdl.tts(text):
yield chunk
except Exception as e:
yield ("data:" + json.dumps({"code": 500, "message": str(e),
"data": {"answer": "**ERROR**: " + str(e)}},
ensure_ascii=False)).encode('utf-8')
def generate():
data = audio_stream.read(1024)
while data:
yield data
data = audio_stream.read(1024)
if audio_stream:
# 确保流的位置在开始处
audio_stream.seek(0)
resp = Response(generate(), mimetype="audio/mpeg")
else:
resp = Response(stream_audio(), mimetype="audio/mpeg")
resp.headers.add_header("Cache-Control", "no-cache")
resp.headers.add_header("Connection", "keep-alive")
resp.headers.add_header("X-Accel-Buffering", "no")
return resp
@manager.route('/chats/<chat_id>/tts', methods=['POST'])
@token_required
def dialog_tts_post(tenant_id, chat_id):
req = request.json
if not req.get("text"):
return get_error_data_result(message="Please input your question.")
delay_gen_audio = req.get('delay_gen_audio', False)
# text = extract_text_from_markdown(req.get('text'))
text = req.get('text')
audio_stream_id = req.get('audio_stream_id')
# logging.info(f"request tts audio url:{text} audio_stream_id:{audio_stream_id} ")
if audio_stream_id is None:
audio_stream_id = str(uuid.uuid4())
# 在这里生成音频流并存储到内存中
model_name = req.get('model_name')
dia = DialogService.get(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value)
tts_model_name = dia.tts_id
if model_name: tts_model_name = model_name
logging.info(f"---tts {tts_model_name}")
tts_mdl = LLMBundle(dia.tenant_id, LLMType.TTS, tts_model_name) # dia.tts_id)
if delay_gen_audio:
audio_stream = None
else:
audio_stream = io.BytesIO()
audio_text_cache[audio_stream_id] = {'text': text, 'chat_id': chat_id, "tenant_id": tenant_id,
'audio_stream': audio_stream,'model_name':model_name} # 缓存文本以便后续生成音频流
if delay_gen_audio is False:
try:
"""
for txt in re.split(r"[,。/《》?;:!\n\r:;]+", text):
try:
if txt is None or txt.strip() == "":
continue
for chunk in tts_mdl.tts(txt):
audio_stream.write(chunk)
except Exception as e:
continue
"""
if text is None or text.strip() == "":
audio_stream.write(b'\x00' * 100)
else:
for chunk in tts_mdl.tts(text):
audio_stream.write(chunk)
except Exception as e:
return get_error_data_result(message="get tts audio stream error.")
# 构建音频流URL
audio_stream_url = f"/chats/{chat_id}/tts/{audio_stream_id}"
logging.info(f"--return request tts audio url {audio_stream_id} {audio_stream_url}")
# 返回音频流URL
return jsonify({"tts_url": audio_stream_url, "audio_stream_id": audio_stream_id})
@manager.route('/agents/<agent_id>/completions', methods=['POST'])
@token_required
def agent_completion(tenant_id, agent_id):