Files
ragflow_python/asr-monitor-test/app/wps_office_service.py
qcloud 074747f902
Some checks failed
tests / ragflow_tests (push) Has been cancelled
106.51.72.204 上的gitea重新初始化,提交到远程
2025-10-09 16:55:45 +08:00

581 lines
20 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# 新增的依赖项和工具函数
from fastapi import APIRouter, Depends, HTTPException, status, Request, Response,Query,Header
from fastapi.responses import StreamingResponse, JSONResponse
import hmac
import hashlib
import time,logging,json,requests
from typing import Dict, Any, Optional
# WPS应用配置 - 请替换为你的实际配置
WPS_APP_ID = "SX20251002WTFLCP"
WPS_APP_SECRET = "hoAGAXMTWXpkDxKFbTnSzjkckdFNNiSC"
class CustomJSONResponse(JSONResponse):
"""
自定义 JSON 响应类,处理特殊类型:
- datetime: 转换为 ISO 8601 字符串
- date: 转换为 ISO 8601 字符串
- Decimal: 转换为 float
"""
def render(self, content: any) -> bytes:
"""
重写渲染方法,使用自定义编码器
"""
class EnhancedJSONEncoder(json.JSONEncoder):
def default(self, obj):
"""
增强型 JSON 编码器,处理多种特殊类型:
- datetime: 转换为 ISO 8601 字符串
- date: 转换为 ISO 8601 字符串
- time: 转换为 ISO 8601 字符串
- Decimal: 转换为 float
- UUID: 转换为字符串
- numpy 类型: 转换为 Python 原生类型
"""
# 处理日期时间类型
if isinstance(obj, datetime):
return obj.isoformat()
if isinstance(obj, date):
return obj.isoformat()
# 处理 Decimal 类型
if isinstance(obj, Decimal):
return float(obj)
# 处理 UUID 类型
if isinstance(obj, UUID):
return str(obj)
"""
# 处理 numpy 类型
if isinstance(obj, np.integer):
return int(obj)
if isinstance(obj, np.floating):
return float(obj)
if isinstance(obj, np.ndarray):
return obj.tolist()
"""
# 处理其他自定义类型
if hasattr(obj, '__json__'):
return obj.__json__()
# 默认处理
return super().default(obj)
return json.dumps(
content,
ensure_ascii=False,
allow_nan=False,
indent=None,
separators=(",", ":"),
cls=EnhancedJSONEncoder
).encode("utf-8")
def verify_wps_signature(
authorization: str = Header(...),
date: str = Header(...),
content_md5: str = Header(...),
content_type: str = Header(...),
x_app_id: str = Header(..., alias="X-App-Id"),
x_weboffice_token: str = Header(..., alias="X-WebOffice-Token")
) -> Dict[str, Any]:
"""
验证WPS请求签名:cite[1]:cite[6]
"""
try:
# 检查AppId是否匹配
if x_app_id != WPS_APP_ID:
raise HTTPException(status_code=401, detail="Invalid AppId")
# 解析Authorization头
if not authorization.startswith("WPS-2:"):
raise HTTPException(status_code=401, detail="Invalid signature format")
parts = authorization.split(":")
if len(parts) != 3:
raise HTTPException(status_code=401, detail="Invalid authorization header")
_, app_id, signature = parts
# 计算期望签名
string_to_sign = WPS_APP_SECRET + content_md5 + content_type + date
expected_signature = hashlib.sha1(string_to_sign.encode()).hexdigest()
# 验证签名
if not hmac.compare_digest(signature, expected_signature):
raise HTTPException(status_code=401, detail="Signature verification failed")
return {
"app_id": app_id,
"token": x_weboffice_token
}
except Exception as e:
if isinstance(e, HTTPException):
raise e
raise HTTPException(status_code=401, detail="Signature verification error")
def parse_user_token(token: str) -> Optional[Dict[str, Any]]:
"""
解析用户Token - 根据你的业务逻辑实现
"""
try:
# 这里可以根据你的业务逻辑解析token
# 例如JWT解码或其他验证方式
payload = jwt.decode(token, JWT_SECRET_KEY, algorithms=[ALGORITHM])
return {
"user_id": payload.get("sub"),
"permissions": payload.get("permissions", [])
}
except Exception:
return None
# WPS回调路由
wps_router = APIRouter()
#三阶段保存的第一步主要用于 WebOffice 与接入方进行参数协商,目前主要协商摘要算法。
@wps_router.get("/v3/3rd/files/{file_id}/upload/prepare", response_class=CustomJSONResponse)
async def upload_prepare_v3(
file_id: str,
):
return {
"code": 0,
"data": {
"digest_types": ["md5"]
},
"message": ""
}
@wps_router.post("/v3/3rd/files/{file_id}/upload/address", response_class=CustomJSONResponse)
async def upload_address_v3(
file_id: str,
):
return {
"code": 0,
"data": {
"method": "PUT",
"url": f"https://ragflow.szzysztech.com/apitest2/wps/v3/3rd/files/{file_id}/upload"
},
"message": ""
}
BASE_API_URL= "http://1.13.185.116:9380/api/v1"
@wps_router.put("/v3/3rd/files/{file_id}/upload", response_class=CustomJSONResponse)
async def receive_upload_file_v3(
file_id: str,
request: Request
):
"""
接收WPS服务器通过PUT请求传来的文件流。
WPS服务器在收到您第一个接口返回的url后会将文件实体放在请求体(Body)中PUT到此接口。
"""
# 从请求头中获取文件大小(如果提供了)
content_length = request.headers.get("content-length")
file_size = 0
if content_length:
try:
file_size = int(content_length)
# 这里可以添加文件大小校验逻辑,例如限制文件不能过大
except ValueError:
raise HTTPException(status_code=400, detail="Invalid Content-Length header")
# 获取请求体中的原始文件流数据
file_data = await request.body()
file_name = ""
if file_id == "demo_file_001":
file_name = "gv9014-bom.xlsx"
if file_id == "demo_file_002":
file_name = "GCDS100900040001.xlsx"
# 调用MINIO中转API上传文件
try:
# 准备表单数据
files = {
'file': (file_id, file_data) # 使用file_id作为文件名
}
data = {
'bucket': 'wps-web-office-files', # 替换为实际的bucket名称
'file_name': file_name
}
headers = {
"Authorization": "Bearer ragflow-NhZTY5Y2M4YWQ1MzExZWY4Zjc3MDI0Mm"
}
# 发送请求到MINIO中转API
minio_api_url = f"{BASE_API_URL}/minio/put" # 确保BASE_API_URL已定义
response = requests.post(
minio_api_url,
files=files,
data=data,
headers=headers
)
# 检查响应状态
if response.status_code == 200:
logging.info(f"File {file_id} successfully uploaded to MINIO")
return {
"code": 0,
"message": "File uploaded and processed successfully"
}
else:
logging.error(f"MINIO upload failed for {file_id}: {response.status_code} - {response.text}")
raise HTTPException(
status_code=500,
detail=f"Failed to upload file to storage: {response.text}"
)
except Exception as e:
logging.error(f"Error uploading file {file_id} to MINIO: {str(e)}")
raise HTTPException(
status_code=500,
detail=f"Internal server error during file upload: {str(e)}"
)
@wps_router.post("/v3/3rd/files/{file_id}/upload/complete", response_class=CustomJSONResponse)
async def upload_complete_v3(
file_id: str,
):
file_name = "GCDS100900040001.xlsx"
if file_id == "demo_file_001":
file_name = "GV9014-BOM.xlsx"
return {
"code": 0,
"data": {
"create_time": 1670218748,
"creator_id": "404",
"id": "9",
"modifier_id": "404",
"modify_time": 1670328304,
"name": file_name,
"size": 18961,
"version": 180
}
}
@wps_router.get("/v3/3rd/files/{file_id}/permission", response_class=CustomJSONResponse)
async def get_file_id_permission_v3(
file_id: str,
):
return {
"code": 0,
"data": {
"comment": 1,
"copy": 1,
"download": 1,
"history": 0,
"print": 1,
"read": 1,
"rename": 0,
"saveas": 1,
"update": 1,
"user_id": "404"
}
}
#GET
#
@wps_router.get("/v3/3rd/users", response_class=CustomJSONResponse)
async def get_users_info_v3(
user_ids: list[str] = Query(..., description="多个用户ID", alias="user_ids")
):
"""
批量获取用户信息 - 调试版本
直接返回示例数据,不进行验证
"""
logging.info(f"批量获取用户信息调试: user_ids={user_ids}")
# 去重处理
unique_user_ids = list(set(user_ids))
# 构建用户信息列表
users_info = []
for user_id in unique_user_ids:
# 为每个用户ID生成对应的示例数据
user_info = {
"id": user_id,
"name": f"用户{user_id}",
# "avatar_url": f"https://example.com/avatars/{user_id}.jpg"
}
users_info.append(user_info)
logging.info(f"返回用户信息: {len(users_info)}个用户")
return {
"code": 0,
"data": users_info
}
@wps_router.get("/v3/3rd/files/{file_id}", response_class=CustomJSONResponse)
async def get_file_info_v3(
file_id: str,
):
logging.info(f"获取文件信息 /v3/3rd/files/{file_id}")
"""
获取文件基本信息 - V3版本接口
遵循WPS WebOffice文件ID一致性原则
"""
try:
# 验证WPS签名
"""
signature_data = verify_wps_signature_direct(
authorization, date, content_md5 or "", content_type or "", x_app_id, x_weboffice_token
)
# 解析用户token验证权限
user_info = parse_user_token(signature_data["token"])
if not user_info:
raise HTTPException(status_code=401, detail="Invalid user token")
"""
#logging.info(f"获取文件信息: file_id={file_id}, user_id={user_info['user_id']}")
logging.info(f"获取文件信息: file_id={file_id}")
# 获取文件信息 - 替换为你的实际数据库查询逻辑
file_info = get_file_by_id_v3(file_id, None)
if not file_info:
logging.warning(f"文件不存在: file_id={file_id}")
raise HTTPException(status_code=404, detail="File not found")
logging.info(f"获取文件信息 /v3/3rd/files/{file_info}")
# 检查用户对文件的访问权限
if not check_file_permission_v3(file_id, None): # user_info["user_id"]):
logging.warning(f"用户无权限访问文件: file_id={file_id}")
raise HTTPException(status_code=403, detail="Insufficient permissions")
# 构建响应数据严格遵循WPS规范
response_data = {
"id": file_info["id"], # 必须与传入的file_id一致
"name": file_info["name"],
"version": file_info["version"],
"size": file_info["size"],
"create_time": file_info["create_time"],
"modify_time": file_info["modify_time"],
"creator_id": "404", #file_info["creator_id"],
"modifier_id": file_info["modifier_id"]
}
# 验证响应数据格式
validation_error = validate_file_info_response(response_data)
if validation_error:
logging.error(f"文件信息响应数据验证失败: {validation_error}")
raise HTTPException(status_code=500, detail="Internal server error: invalid file data format")
logging.info(f"成功获取文件信息: file_id={file_id}")
return {
"code": 0,
"data": response_data
}
except HTTPException:
raise
except Exception as e:
logging.error(f"获取文件信息异常: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
@wps_router.get("/v3/3rd/files/{file_id}/download", response_class=CustomJSONResponse)
async def get_file_download_url(
file_id: str
):
"""
获取文件下载地址 - 调试版本
返回文件的下载URL供WPS在线协同服务使用
"""
logging.info(f"获取文件下载地址: file_id={file_id}")
# 构建下载URL - 这里使用示例URL实际使用时替换为你的真实文件下载地址
if file_id == "demo_file_001":
download_url = f"http://1.13.185.116:9000/wps-web-office-files/gv9014-bom.xlsx"
elif file_id == "demo_file_002":
download_url = f"http://1.13.185.116:9000/wps-web-office-files/GCDS100900040001.xlsx"
elif file_id == "hanjie_sop":
download_url = f"http://1.13.185.116:9000/wps-web-office-files/hanjie_sop.xls"
else:
download_url = f"http://1.13.185.116:9000/wps-web-office-files/GCDS100900040001.xlsx"
# 构建响应数据
response_data = {
"url": download_url
# digest 和 digest_type 可选,用于文件校验
# "digest": "a1b2c3d4e5f6...", # 文件的MD5或SHA1值
# "digest_type": "md5", # 校验算法: md5 或 sha1
# headers 可选,用于需要额外请求头的场景(如防盗链)
# "headers": {
# "Referer": "https://your-domain.com",
# "Authorization": "Bearer your-token"
# }
}
logging.info(f"返回文件下载地址: {download_url}")
return {
"code": 0,
"data": response_data
}
def get_file_by_id_v3(file_id: str, user_id: str) -> Optional[Dict[str, Any]]:
"""
根据文件ID获取文件信息 - V3版本
需要你根据实际业务逻辑实现
"""
try:
# 这里替换为你的实际数据库查询逻辑
# 示例实现:
# 1. 查询数据库获取文件基本信息
# file_record = query_file_from_database(file_id)
# 2. 如果文件不存在返回None
# if not file_record:
# return None
# 3. 返回符合WPS规范的数据结构
# return {
# "id": file_record["file_id"], # 必须与传入的file_id一致
# "name": file_record["file_name"],
# "version": file_record["version"],
# "size": file_record["file_size"],
# "create_time": int(file_record["create_time"].timestamp()), # 转换为纪元秒
# "modify_time": int(file_record["update_time"].timestamp()), # 转换为纪元秒
# "creator_id": file_record["creator_id"],
# "modifier_id": file_record["last_modifier_id"]
# }
# 临时示例数据 - 请替换为实际实现
if file_id in ["example_file_123","dale_123","demo_file_001","demo_file_002","hanjie_sop"]:
return {
"id": file_id, # 必须与传入的file_id一致
"name": "统计月报.xlsx",
"version": 201,
"size": 18961,
"create_time": 1670218748, # 纪元秒
"modify_time": 1759478858, # 纪元秒
"creator_id": "user_404",
"modifier_id": "user_404"
}
else:
# 文件不存在
return None
except Exception as e:
logging.error(f"查询文件信息失败: {str(e)}")
return None
def check_file_permission_v3(file_id: str, user_id: str) -> bool:
"""
检查用户对文件的访问权限 - V3版本
需要你根据实际业务逻辑实现
"""
try:
# 这里替换为你的实际权限检查逻辑
# 示例实现:
# 1. 查询用户对文件的权限
# permission = query_file_permission(file_id, user_id)
# 2. 返回是否有访问权限
# return permission.get("can_read", False)
# 临时示例 - 请替换为实际实现
return True
except Exception as e:
logging.error(f"检查文件权限失败: {str(e)}")
return False
def validate_file_info_response(file_data: Dict[str, Any]) -> Optional[str]:
"""
验证文件信息响应数据是否符合WPS规范
"""
# 检查必需字段
required_fields = ["id", "name", "version", "size", "create_time", "modify_time", "creator_id", "modifier_id"]
for field in required_fields:
if field not in file_data:
return f"Missing required field: {field}"
# 验证文件ID长度
if len(file_data["id"]) > 47:
return "File ID exceeds maximum length of 47 characters"
# 验证文件名长度和特殊字符
if len(file_data["name"]) > 240:
return "File name exceeds maximum length of 240 characters"
invalid_chars = ['\\', '/', '|', '"', ':', '*', '?', '<', '>']
for char in invalid_chars:
if char in file_data["name"]:
return f"File name contains invalid character: {char}"
# 验证版本号
if not isinstance(file_data["version"], int) or file_data["version"] < 1:
return "Version must be a positive integer"
# 验证文件大小
if not isinstance(file_data["size"], int) or file_data["size"] < 0:
return "Size must be a non-negative integer"
# 验证时间戳
if not isinstance(file_data["create_time"], int) or file_data["create_time"] < 0:
return "Create time must be a non-negative integer"
if not isinstance(file_data["modify_time"], int) or file_data["modify_time"] < 0:
return "Modify time must be a non-negative integer"
return None
def verify_wps_signature_direct(
authorization: str,
date: str,
content_md5: str,
content_type: str,
x_app_id: str,
x_weboffice_token: str
) -> Dict[str, Any]:
"""
直接验证WPS请求签名
"""
try:
# 检查AppId是否匹配
if x_app_id != WPS_APP_ID:
raise HTTPException(status_code=401, detail="Invalid AppId")
# 解析Authorization头
if not authorization.startswith("WPS-2:"):
raise HTTPException(status_code=401, detail="Invalid signature format")
parts = authorization.split(":")
if len(parts) != 3:
raise HTTPException(status_code=401, detail="Invalid authorization header")
_, app_id, signature = parts
# 计算期望签名
string_to_sign = WPS_APP_SECRET + content_md5 + content_type + date
expected_signature = hashlib.sha1(string_to_sign.encode()).hexdigest()
# 验证签名
if not hmac.compare_digest(signature, expected_signature):
raise HTTPException(status_code=401, detail="Signature verification failed")
return {
"app_id": app_id,
"token": x_weboffice_token
}
except HTTPException:
raise
except Exception as e:
logging.error(f"WPS签名验证异常: {str(e)}")
raise HTTPException(status_code=401, detail="Signature verification error")