针对微信小程序的后台经过5天测试,具备一定的稳定性,提交版本用于保存
This commit is contained in:
@@ -31,7 +31,7 @@ from api.utils.api_utils import get_result
|
||||
from api.utils.file_utils import get_project_base_directory
|
||||
from rag.utils.minio_conn import RAGFlowMinio
|
||||
import logging
|
||||
import base64, gzip
|
||||
import base64, gzip,json
|
||||
import io, re, json
|
||||
from io import BytesIO
|
||||
import queue,time,uuid,os,array
|
||||
@@ -135,13 +135,13 @@ def upload_file(tenant_id,mesum_id):
|
||||
|
||||
mesum_id_str = str(mesum_id)
|
||||
labels_with_id = get_labels_with_id(mesum_id)
|
||||
antique_labels = ','.join([item['label'] for item in labels_with_id])
|
||||
antique_labels = ';'.join([item['label'] for item in labels_with_id]) # 使用分号分隔
|
||||
joined_string = antique_labels
|
||||
antiques_selected = f"{joined_string}"
|
||||
|
||||
#logging.info(f"mesumid={mesum_id} {joined_string}")
|
||||
|
||||
prompt1 = (f"你是一名图片识别和理解助手"
|
||||
prompt1= (f"你是一名图片识别和理解助手"
|
||||
f"任务是先识别图片中文字,然后理解文字中包含的内容,分析哪一项可以作为识别出文字的标题,"
|
||||
f"你的回答有3个结果,第一个结果匹配出的结果,JSON键值为antique"
|
||||
f"从下面的候选项:{antiques_selected}进行匹配,每一个候选项中间以';'分割,如果没有任何匹配则结果为'',以免误触发讲解,匹配成功则输出匹配出的内容"
|
||||
@@ -151,25 +151,29 @@ def upload_file(tenant_id,mesum_id):
|
||||
f"原始数据的键值为text,输出是1个完整的JSON数据,不要有多余的前置和后置内容,确保前端能正确解析出JSON数据")
|
||||
|
||||
prompt = (
|
||||
f"作为图片识别和理解助手,您的任务是:"
|
||||
f"\n1. 图片基本上就是展品标题、历史人物或者历史事件"
|
||||
f"\n2. 精确识别图片中的文字内容,理解文字语义,重点分析字体较大的文字"
|
||||
f"\n3. 识别出的文字包含标题或者接近于标题的文字"
|
||||
f"\n4. 从以下候选标题中选择最佳匹配项:"
|
||||
f"\n {antiques_selected}"
|
||||
f"\n\n### 输出要求:"
|
||||
f"\n- 以严格JSON格式输出,包含3个字段:"
|
||||
f"\n • `antique`: 匹配的标题(多个用英文分号';'分割,最多匹配3个,无匹配则空字符串)"
|
||||
f"\n • `text`: 识别出的完整文字"
|
||||
f"\n • `match_score`: 整体匹配度(0-1的浮点数),1=完全匹配"
|
||||
f"\n\n### 匹配规则:"
|
||||
f"\n1. 语义匹配优先于字面匹配"
|
||||
f"\n2. 考虑同义词、近义词和描述性匹配"
|
||||
f"\n3. 允许部分匹配(如'青铜酒器'匹配'青铜器')"
|
||||
f"\n4. 若无明确匹配项,`antique`返回空字符串"
|
||||
f"\n\n### 重要:"
|
||||
f"\n- 输出必须是可直接解析的JSON,无任何前置/后置文本"
|
||||
f"\n- 匹配度评分需客观反映文本与候选标题的相似度"
|
||||
f"作为博物馆展品识别专家,您的任务是:"
|
||||
f"\n1. 识别图片中的文字内容,重点关注展品标题(通常是最大/最显眼的文字)"
|
||||
f"\n2. 从以下候选标题中匹配最佳项:{antiques_selected}"
|
||||
f"\n3. 匹配规则:"
|
||||
f"\n - 优先匹配完整标题(如'铜踵饰残片'匹配'铜踵饰残片')"
|
||||
f"\n - 其次匹配关键词(如'刻辞卜骨'可匹配'刻辞卜骨')"
|
||||
f"\n - 允许部分匹配(如'铜器'匹配'青铜器')"
|
||||
f"\n - 忽略拼音、英文和次要描述文字"
|
||||
f"\n - 如果近似,不好区分,则输出数组供前端选择,如:青铜车䡇匹配青铜车䡇;青铜车軛一对"
|
||||
f"\n4. 输出要求:"
|
||||
f"\n - 匹配结果最多不超过5个"
|
||||
f"\n - 用英文分号';'分隔多个匹配项"
|
||||
f"\n\n输出严格JSON格式:"
|
||||
f"\n{'{'}"
|
||||
f"\n \"antique\": \"匹配结果(多个用分号分隔)\","
|
||||
f"\n \"text\": \"识别出的完整文字\","
|
||||
f"\n \"match_score\": 整体匹配度(0-1)"
|
||||
f"\n{'}'}"
|
||||
f"\n\n示例:"
|
||||
f"\n候选标题:青铜车䡇;玉虎;甲骨文;刻辞卜骨"
|
||||
f"\n识别文字:『青铜车䡇』商代..."
|
||||
f"\n正确输出:"
|
||||
f"\n{'{'}\"antique\": \"青铜车轼\", \"text\": \"青铜车轼 (yue)...\", \"match_score\": 0.95{'}'}"
|
||||
)
|
||||
file = request.files['file']
|
||||
|
||||
@@ -212,7 +216,7 @@ def upload_file(tenant_id,mesum_id):
|
||||
]
|
||||
)
|
||||
|
||||
"""
|
||||
|
||||
client = OpenAI(
|
||||
api_key="sk-a47a3fb5f4a94f66bbaf713779101c75",
|
||||
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||
@@ -238,20 +242,58 @@ def upload_file(tenant_id,mesum_id):
|
||||
},
|
||||
],
|
||||
)
|
||||
"""
|
||||
vl_model = "doubao-1-5-thinking-vision-pro-250428"
|
||||
client = OpenAI(
|
||||
api_key="1e04d30a-0c56-4dbd-b873-53f26649c64f",
|
||||
base_url="https://ark.cn-beijing.volces.com/api/v3",
|
||||
)
|
||||
response = client.chat.completions.create(
|
||||
# 指定您创建的方舟推理接入点 ID,此处已帮您修改为您的推理接入点 ID
|
||||
model=vl_model,
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": [{"type": "text", "text": prompt}],
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{img_base}"
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
message = response.choices[0].message
|
||||
parsed_json_res = parse_markdown_json(message.content)
|
||||
parsed_json_data = {"antique": "", "text": "", "match_score": 0}
|
||||
matchedArray = []
|
||||
try:
|
||||
if isinstance(message.content, str):
|
||||
parsed_json_res = parse_markdown_json(message.content) # 优先识别带有markdown格式
|
||||
if parsed_json_res.get('success') is False: # 如果识别失败,再识别普通json格式(字符串)
|
||||
parsed_json_data = json.loads(message.content)
|
||||
parsed_json_res['success'] = True
|
||||
parsed_json_res['data'] = parsed_json_data
|
||||
except Exception as e:
|
||||
pass
|
||||
#logging.info(f"识别完成 {message.content} {parsed_json_data} ")
|
||||
|
||||
if parsed_json_res.get('success') is True:
|
||||
parsed_json_data = parsed_json_res.get('data')
|
||||
matchedAntiqueArray = parsed_json_data.get('antique').split(';') # 识别出的文物的数组,中间以';'分割,可能有多个
|
||||
if len(matchedAntiqueArray) ==1: # 只有一个匹配项,直接返回
|
||||
logging.info(f"识别完成 得到1个,{parsed_json_data.get('antique')} {labels_with_id} ")
|
||||
for item in labels_with_id:
|
||||
if item['label'] == parsed_json_data.get('antique'):
|
||||
parsed_json_data['id'] = item.get('id')
|
||||
else: # 有多个匹配项,需要进行多个匹配
|
||||
for label in matchedAntiqueArray:
|
||||
for label in matchedAntiqueArray[:5]:
|
||||
antique= {'label':label}
|
||||
for item in labels_with_id:
|
||||
if item['label'] == label:
|
||||
|
||||
@@ -115,6 +115,7 @@ def update(tenant_id, chat_id, session_id):
|
||||
@token_required
|
||||
def completion(tenant_id, chat_id): # chat_id 和 别的文件中的dialog_id 应该是一个意思? cyx 2025-01-25
|
||||
req = request.json
|
||||
logging.info(f"/chats/{chat_id}/completions--0 req={req}") # cyx
|
||||
if not req.get("session_id"): # session_id 和 别的文件中的conversation_id 应该是一个意思? cyx 2025-01-25
|
||||
conv = {
|
||||
"id": get_uuid(),
|
||||
|
||||
Reference in New Issue
Block a user