在生成对话文字时,同时在后台生成tts音频,增加朗读音色选择,增加博物馆的概况接口
This commit is contained in:
@@ -19,22 +19,22 @@ import logging
|
||||
from copy import deepcopy
|
||||
from uuid import uuid4
|
||||
from api.db import LLMType
|
||||
from flask import request, Response, jsonify
|
||||
from flask import request, Response, jsonify, stream_with_context
|
||||
from api.db.services.dialog_service import ask
|
||||
from agent.canvas import Canvas
|
||||
from api.db import StatusEnum
|
||||
from api.db.db_models import API4Conversation
|
||||
from api.db.services.api_service import API4ConversationService
|
||||
from api.db.services.canvas_service import UserCanvasService
|
||||
from api.db.services.dialog_service import DialogService, ConversationService, chat
|
||||
from api.db.services.dialog_service import DialogService, ConversationService, chat,stream_manager
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.utils import get_uuid
|
||||
from api.utils.api_utils import get_error_data_result
|
||||
from api.utils.api_utils import get_result, token_required
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
import uuid
|
||||
import queue
|
||||
|
||||
import queue,time
|
||||
from threading import Lock,Thread
|
||||
|
||||
@manager.route('/chats/<chat_id>/sessions', methods=['POST'])
|
||||
@token_required
|
||||
@@ -239,186 +239,6 @@ def completion(tenant_id, chat_id): # chat_id 和 别的文件中的dialog_id
|
||||
break
|
||||
return get_result(data=answer)
|
||||
|
||||
|
||||
# 全角字符到半角字符的映射
|
||||
|
||||
|
||||
def fullwidth_to_halfwidth(s):
|
||||
full_to_half_map = {
|
||||
'!': '!', '"': '"', '#': '#', '$': '$', '%': '%', '&': '&', ''': "'",
|
||||
'(': '(', ')': ')', '*': '*', '+': '+', ',': ',', '-': '-', '.': '.',
|
||||
'/': '/', ':': ':', ';': ';', '<': '<', '=': '=', '>': '>', '?': '?',
|
||||
'@': '@', '[': '[', '\': '\\', ']': ']', '^': '^', '_': '_', '`': '`',
|
||||
'{': '{', '|': '|', '}': '}', '~': '~', '⦅': '⦅', '⦆': '⦆', '「': '「',
|
||||
'」': '」', '、': ',', '・': '.', 'ー': '-', '。': '.', '「': '「', '」': '」',
|
||||
'、': '、', '・': '・', ':': ':'
|
||||
}
|
||||
return ''.join(full_to_half_map.get(char, char) for char in s)
|
||||
|
||||
|
||||
def is_dale(s):
|
||||
full_to_half_map = {
|
||||
'!': '!', '"': '"', '#': '#', '$': '$', '%': '%', '&': '&', ''': "'",
|
||||
'(': '(', ')': ')', '*': '*', '+': '+', ',': ',', '-': '-', '.': '.',
|
||||
'/': '/', ':': ':', ';': ';', '<': '<', '=': '=', '>': '>', '?': '?',
|
||||
'@': '@', '[': '[', '\': '\\', ']': ']', '^': '^', '_': '_', '`': '`',
|
||||
'{': '{', '|': '|', '}': '}', '~': '~', '⦅': '⦅', '⦆': '⦆', '「': '「',
|
||||
'」': '」', '、': ',', '・': '.', 'ー': '-', '。': '.', '「': '「', '」': '」',
|
||||
'、': '、', '・': '・', ':': ':', '。': '.'
|
||||
}
|
||||
|
||||
|
||||
def extract_text_from_markdown(markdown_text):
|
||||
# 移除Markdown标题
|
||||
text = re.sub(r'#\s*[^#]+', '', markdown_text)
|
||||
# 移除内联代码块
|
||||
text = re.sub(r'`[^`]+`', '', text)
|
||||
# 移除代码块
|
||||
text = re.sub(r'```[\s\S]*?```', '', text)
|
||||
# 移除加粗和斜体
|
||||
text = re.sub(r'[*_]{1,3}(?=\S)(.*?\S[*_]{1,3})', '', text)
|
||||
# 移除链接
|
||||
text = re.sub(r'\[.*?\]\(.*?\)', '', text)
|
||||
# 移除图片
|
||||
text = re.sub(r'!\[.*?\]\(.*?\)', '', text)
|
||||
# 移除HTML标签
|
||||
text = re.sub(r'<[^>]+>', '', text)
|
||||
# 转换标点符号
|
||||
# text = re.sub(r'[^\w\s]', '', text)
|
||||
text = fullwidth_to_halfwidth(text)
|
||||
# 移除多余的空格
|
||||
text = re.sub(r'\s+', ' ', text).strip()
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def split_text_at_punctuation(text, chunk_size=100):
|
||||
# 使用正则表达式找到所有的标点符号和特殊字符
|
||||
punctuation_pattern = r'[\s,.!?;:\-\—\(\)\[\]{}"\'\\\/]+'
|
||||
tokens = re.split(punctuation_pattern, text)
|
||||
|
||||
# 移除空字符串
|
||||
tokens = [token for token in tokens if token]
|
||||
|
||||
# 存储最终的文本块
|
||||
chunks = []
|
||||
current_chunk = ''
|
||||
|
||||
for token in tokens:
|
||||
if len(current_chunk) + len(token) <= chunk_size:
|
||||
# 如果添加当前token后长度不超过chunk_size,则添加到当前块
|
||||
current_chunk += (token + ' ')
|
||||
else:
|
||||
# 如果长度超过chunk_size,则将当前块添加到chunks列表,并开始新块
|
||||
chunks.append(current_chunk.strip())
|
||||
current_chunk = token + ' '
|
||||
|
||||
# 添加最后一个块(如果有剩余)
|
||||
if current_chunk:
|
||||
chunks.append(current_chunk.strip())
|
||||
|
||||
return chunks
|
||||
|
||||
audio_text_cache = {}
|
||||
|
||||
@manager.route('/chats/<chat_id>/tts/<audio_stream_id>', methods=['GET'])
|
||||
def dialog_tts_get(chat_id, audio_stream_id):
|
||||
tts_info = audio_text_cache.pop(audio_stream_id, None)
|
||||
req = tts_info
|
||||
if not req:
|
||||
return get_error_data_result(message="Audio stream not found or expired.")
|
||||
audio_stream = req.get('audio_stream')
|
||||
tenant_id = req.get('tenant_id')
|
||||
chat_id = req.get('chat_id')
|
||||
text = req.get('text', "..")
|
||||
model_name = req.get('model_name')
|
||||
dia = DialogService.get(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value)
|
||||
if not dia:
|
||||
return get_error_data_result(message="You do not own the chat")
|
||||
tts_model_name = dia.tts_id
|
||||
if model_name: tts_model_name = model_name
|
||||
tts_mdl = LLMBundle(dia.tenant_id, LLMType.TTS, tts_model_name) # dia.tts_id)
|
||||
|
||||
def stream_audio():
|
||||
try:
|
||||
for chunk in tts_mdl.tts(text):
|
||||
yield chunk
|
||||
except Exception as e:
|
||||
yield ("data:" + json.dumps({"code": 500, "message": str(e),
|
||||
"data": {"answer": "**ERROR**: " + str(e)}},
|
||||
ensure_ascii=False)).encode('utf-8')
|
||||
|
||||
def generate():
|
||||
data = audio_stream.read(1024)
|
||||
while data:
|
||||
yield data
|
||||
data = audio_stream.read(1024)
|
||||
|
||||
if audio_stream:
|
||||
# 确保流的位置在开始处
|
||||
audio_stream.seek(0)
|
||||
resp = Response(generate(), mimetype="audio/mpeg")
|
||||
else:
|
||||
resp = Response(stream_audio(), mimetype="audio/mpeg")
|
||||
resp.headers.add_header("Cache-Control", "no-cache")
|
||||
resp.headers.add_header("Connection", "keep-alive")
|
||||
resp.headers.add_header("X-Accel-Buffering", "no")
|
||||
return resp
|
||||
|
||||
|
||||
@manager.route('/chats/<chat_id>/tts', methods=['POST'])
|
||||
@token_required
|
||||
def dialog_tts_post(tenant_id, chat_id):
|
||||
req = request.json
|
||||
if not req.get("text"):
|
||||
return get_error_data_result(message="Please input your question.")
|
||||
delay_gen_audio = req.get('delay_gen_audio', False)
|
||||
# text = extract_text_from_markdown(req.get('text'))
|
||||
text = req.get('text')
|
||||
audio_stream_id = req.get('audio_stream_id')
|
||||
# logging.info(f"request tts audio url:{text} audio_stream_id:{audio_stream_id} ")
|
||||
if audio_stream_id is None:
|
||||
audio_stream_id = str(uuid.uuid4())
|
||||
# 在这里生成音频流并存储到内存中
|
||||
model_name = req.get('model_name')
|
||||
dia = DialogService.get(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value)
|
||||
tts_model_name = dia.tts_id
|
||||
if model_name: tts_model_name = model_name
|
||||
logging.info(f"---tts {tts_model_name}")
|
||||
tts_mdl = LLMBundle(dia.tenant_id, LLMType.TTS, tts_model_name) # dia.tts_id)
|
||||
if delay_gen_audio:
|
||||
audio_stream = None
|
||||
else:
|
||||
audio_stream = io.BytesIO()
|
||||
audio_text_cache[audio_stream_id] = {'text': text, 'chat_id': chat_id, "tenant_id": tenant_id,
|
||||
'audio_stream': audio_stream,'model_name':model_name} # 缓存文本以便后续生成音频流
|
||||
if delay_gen_audio is False:
|
||||
try:
|
||||
"""
|
||||
for txt in re.split(r"[,。/《》?;:!\n\r:;]+", text):
|
||||
try:
|
||||
if txt is None or txt.strip() == "":
|
||||
continue
|
||||
for chunk in tts_mdl.tts(txt):
|
||||
audio_stream.write(chunk)
|
||||
except Exception as e:
|
||||
continue
|
||||
"""
|
||||
if text is None or text.strip() == "":
|
||||
audio_stream.write(b'\x00' * 100)
|
||||
else:
|
||||
for chunk in tts_mdl.tts(text):
|
||||
audio_stream.write(chunk)
|
||||
except Exception as e:
|
||||
return get_error_data_result(message="get tts audio stream error.")
|
||||
|
||||
# 构建音频流URL
|
||||
audio_stream_url = f"/chats/{chat_id}/tts/{audio_stream_id}"
|
||||
logging.info(f"--return request tts audio url {audio_stream_id} {audio_stream_url}")
|
||||
# 返回音频流URL
|
||||
return jsonify({"tts_url": audio_stream_url, "audio_stream_id": audio_stream_id})
|
||||
|
||||
|
||||
@manager.route('/agents/<agent_id>/completions', methods=['POST'])
|
||||
@token_required
|
||||
def agent_completion(tenant_id, agent_id):
|
||||
|
||||
Reference in New Issue
Block a user