针对微信小程序的后台经过5天测试,具备一定的稳定性,提交版本用于保存

This commit is contained in:
qcloud
2025-07-27 21:37:28 +08:00
parent 44cb7c0dca
commit e6644a5262
18 changed files with 3147 additions and 376 deletions

View File

@@ -31,7 +31,7 @@ from api.utils.api_utils import get_result
from api.utils.file_utils import get_project_base_directory
from rag.utils.minio_conn import RAGFlowMinio
import logging
import base64, gzip
import base64, gzip,json
import io, re, json
from io import BytesIO
import queue,time,uuid,os,array
@@ -135,13 +135,13 @@ def upload_file(tenant_id,mesum_id):
mesum_id_str = str(mesum_id)
labels_with_id = get_labels_with_id(mesum_id)
antique_labels = ','.join([item['label'] for item in labels_with_id])
antique_labels = ';'.join([item['label'] for item in labels_with_id]) # 使用分号分隔
joined_string = antique_labels
antiques_selected = f"{joined_string}"
#logging.info(f"mesumid={mesum_id} {joined_string}")
prompt1 = (f"你是一名图片识别和理解助手"
prompt1= (f"你是一名图片识别和理解助手"
f"任务是先识别图片中文字,然后理解文字中包含的内容,分析哪一项可以作为识别出文字的标题,"
f"你的回答有3个结果第一个结果匹配出的结果,JSON键值为antique"
f"从下面的候选项:{antiques_selected}进行匹配,每一个候选项中间以';'分割,如果没有任何匹配则结果为'',以免误触发讲解,匹配成功则输出匹配出的内容"
@@ -151,25 +151,29 @@ def upload_file(tenant_id,mesum_id):
f"原始数据的键值为text输出是1个完整的JSON数据不要有多余的前置和后置内容确保前端能正确解析出JSON数据")
prompt = (
f"作为图片识别和理解助手,您的任务是:"
f"\n1. 图片基本上就是展品标题、历史人物或者历史事件"
f"\n2. 精确识别图片中的文字内容,理解文字语义,重点分析字体较大的文字"
f"\n3. 识别出的文字包含标题或者接近于标题的文字"
f"\n4. 从以下候选标题中选择最佳匹配项:"
f"\n {antiques_selected}"
f"\n\n### 输出要求:"
f"\n- 以严格JSON格式输出包含3个字段"
f"\n • `antique`: 匹配的标题(多个用英文分号';'分割最多匹配3个无匹配则空字符串"
f"\n • `text`: 识别出的完整文字"
f"\n • `match_score`: 整体匹配度(0-1的浮点数)1=完全匹配"
f"\n\n### 匹配规则:"
f"\n1. 语义匹配优先于字面匹配"
f"\n2. 考虑同义词、近义词和描述性匹配"
f"\n3. 允许部分匹配(如'青铜酒器'匹配'青铜器'"
f"\n4. 若无明确匹配项,`antique`返回空字符串"
f"\n\n### 重要:"
f"\n- 输出必须是可直接解析的JSON无任何前置/后置文本"
f"\n- 匹配度评分需客观反映文本与候选标题的相似度"
f"作为博物馆展品识别专家,您的任务是:"
f"\n1. 识别图片中的文字内容,重点关注展品标题(通常是最大/最显眼的文字)"
f"\n2. 从以下候选标题中匹配最佳项:{antiques_selected}"
f"\n3. 匹配规则:"
f"\n - 优先匹配完整标题(如'铜踵饰残片'匹配'铜踵饰残片'"
f"\n - 其次匹配关键词(如'刻辞卜骨'可匹配'刻辞卜骨'"
f"\n - 允许部分匹配(如'铜器'匹配'青铜器'"
f"\n - 忽略拼音、英文和次要描述文字"
f"\n - 如果近似,不好区分,则输出数组供前端选择,如:青铜车䡇匹配青铜车䡇;青铜车軛一对"
f"\n4. 输出要求:"
f"\n - 匹配结果最多不超过5个"
f"\n - 用英文分号';'分隔多个匹配项"
f"\n\n输出严格JSON格式"
f"\n{'{'}"
f"\n \"antique\": \"匹配结果(多个用分号分隔)\","
f"\n \"text\": \"识别出的完整文字\","
f"\n \"match_score\": 整体匹配度(0-1)"
f"\n{'}'}"
f"\n\n示例:"
f"\n候选标题:青铜车䡇;玉虎;甲骨文;刻辞卜骨"
f"\n识别文字:『青铜车䡇』商代..."
f"\n正确输出:"
f"\n{'{'}\"antique\": \"青铜车轼\", \"text\": \"青铜车轼 (yue)...\", \"match_score\": 0.95{'}'}"
)
file = request.files['file']
@@ -212,7 +216,7 @@ def upload_file(tenant_id,mesum_id):
]
)
"""
client = OpenAI(
api_key="sk-a47a3fb5f4a94f66bbaf713779101c75",
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
@@ -238,20 +242,58 @@ def upload_file(tenant_id,mesum_id):
},
],
)
"""
vl_model = "doubao-1-5-thinking-vision-pro-250428"
client = OpenAI(
api_key="1e04d30a-0c56-4dbd-b873-53f26649c64f",
base_url="https://ark.cn-beijing.volces.com/api/v3",
)
response = client.chat.completions.create(
# 指定您创建的方舟推理接入点 ID此处已帮您修改为您的推理接入点 ID
model=vl_model,
messages=[
{
"role": "system",
"content": [{"type": "text", "text": prompt}],
},
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{img_base}"
},
}
],
}
],
)
message = response.choices[0].message
parsed_json_res = parse_markdown_json(message.content)
parsed_json_data = {"antique": "", "text": "", "match_score": 0}
matchedArray = []
try:
if isinstance(message.content, str):
parsed_json_res = parse_markdown_json(message.content) # 优先识别带有markdown格式
if parsed_json_res.get('success') is False: # 如果识别失败再识别普通json格式(字符串)
parsed_json_data = json.loads(message.content)
parsed_json_res['success'] = True
parsed_json_res['data'] = parsed_json_data
except Exception as e:
pass
#logging.info(f"识别完成 {message.content} {parsed_json_data} ")
if parsed_json_res.get('success') is True:
parsed_json_data = parsed_json_res.get('data')
matchedAntiqueArray = parsed_json_data.get('antique').split(';') # 识别出的文物的数组,中间以';'分割,可能有多个
if len(matchedAntiqueArray) ==1: # 只有一个匹配项,直接返回
logging.info(f"识别完成 得到1个{parsed_json_data.get('antique')} {labels_with_id} ")
for item in labels_with_id:
if item['label'] == parsed_json_data.get('antique'):
parsed_json_data['id'] = item.get('id')
else: # 有多个匹配项,需要进行多个匹配
for label in matchedAntiqueArray:
for label in matchedAntiqueArray[:5]:
antique= {'label':label}
for item in labels_with_id:
if item['label'] == label:

View File

@@ -115,6 +115,7 @@ def update(tenant_id, chat_id, session_id):
@token_required
def completion(tenant_id, chat_id): # chat_id 和 别的文件中的dialog_id 应该是一个意思? cyx 2025-01-25
req = request.json
logging.info(f"/chats/{chat_id}/completions--0 req={req}") # cyx
if not req.get("session_id"): # session_id 和 别的文件中的conversation_id 应该是一个意思? cyx 2025-01-25
conv = {
"id": get_uuid(),