Files
ragflow_python/asr-monitor-test/app/wps_office_service.py

581 lines
20 KiB
Python
Raw Normal View History

# 新增的依赖项和工具函数
from fastapi import APIRouter, Depends, HTTPException, status, Request, Response,Query,Header
from fastapi.responses import StreamingResponse, JSONResponse
import hmac
import hashlib
import time,logging,json,requests
from typing import Dict, Any, Optional
# WPS应用配置 - 请替换为你的实际配置
WPS_APP_ID = "SX20251002WTFLCP"
WPS_APP_SECRET = "hoAGAXMTWXpkDxKFbTnSzjkckdFNNiSC"
class CustomJSONResponse(JSONResponse):
"""
自定义 JSON 响应类处理特殊类型
- datetime: 转换为 ISO 8601 字符串
- date: 转换为 ISO 8601 字符串
- Decimal: 转换为 float
"""
def render(self, content: any) -> bytes:
"""
重写渲染方法使用自定义编码器
"""
class EnhancedJSONEncoder(json.JSONEncoder):
def default(self, obj):
"""
增强型 JSON 编码器处理多种特殊类型
- datetime: 转换为 ISO 8601 字符串
- date: 转换为 ISO 8601 字符串
- time: 转换为 ISO 8601 字符串
- Decimal: 转换为 float
- UUID: 转换为字符串
- numpy 类型: 转换为 Python 原生类型
"""
# 处理日期时间类型
if isinstance(obj, datetime):
return obj.isoformat()
if isinstance(obj, date):
return obj.isoformat()
# 处理 Decimal 类型
if isinstance(obj, Decimal):
return float(obj)
# 处理 UUID 类型
if isinstance(obj, UUID):
return str(obj)
"""
# 处理 numpy 类型
if isinstance(obj, np.integer):
return int(obj)
if isinstance(obj, np.floating):
return float(obj)
if isinstance(obj, np.ndarray):
return obj.tolist()
"""
# 处理其他自定义类型
if hasattr(obj, '__json__'):
return obj.__json__()
# 默认处理
return super().default(obj)
return json.dumps(
content,
ensure_ascii=False,
allow_nan=False,
indent=None,
separators=(",", ":"),
cls=EnhancedJSONEncoder
).encode("utf-8")
def verify_wps_signature(
authorization: str = Header(...),
date: str = Header(...),
content_md5: str = Header(...),
content_type: str = Header(...),
x_app_id: str = Header(..., alias="X-App-Id"),
x_weboffice_token: str = Header(..., alias="X-WebOffice-Token")
) -> Dict[str, Any]:
"""
验证WPS请求签名:cite[1]:cite[6]
"""
try:
# 检查AppId是否匹配
if x_app_id != WPS_APP_ID:
raise HTTPException(status_code=401, detail="Invalid AppId")
# 解析Authorization头
if not authorization.startswith("WPS-2:"):
raise HTTPException(status_code=401, detail="Invalid signature format")
parts = authorization.split(":")
if len(parts) != 3:
raise HTTPException(status_code=401, detail="Invalid authorization header")
_, app_id, signature = parts
# 计算期望签名
string_to_sign = WPS_APP_SECRET + content_md5 + content_type + date
expected_signature = hashlib.sha1(string_to_sign.encode()).hexdigest()
# 验证签名
if not hmac.compare_digest(signature, expected_signature):
raise HTTPException(status_code=401, detail="Signature verification failed")
return {
"app_id": app_id,
"token": x_weboffice_token
}
except Exception as e:
if isinstance(e, HTTPException):
raise e
raise HTTPException(status_code=401, detail="Signature verification error")
def parse_user_token(token: str) -> Optional[Dict[str, Any]]:
"""
解析用户Token - 根据你的业务逻辑实现
"""
try:
# 这里可以根据你的业务逻辑解析token
# 例如JWT解码或其他验证方式
payload = jwt.decode(token, JWT_SECRET_KEY, algorithms=[ALGORITHM])
return {
"user_id": payload.get("sub"),
"permissions": payload.get("permissions", [])
}
except Exception:
return None
# WPS回调路由
wps_router = APIRouter()
#三阶段保存的第一步主要用于 WebOffice 与接入方进行参数协商,目前主要协商摘要算法。
@wps_router.get("/v3/3rd/files/{file_id}/upload/prepare", response_class=CustomJSONResponse)
async def upload_prepare_v3(
file_id: str,
):
return {
"code": 0,
"data": {
"digest_types": ["md5"]
},
"message": ""
}
@wps_router.post("/v3/3rd/files/{file_id}/upload/address", response_class=CustomJSONResponse)
async def upload_address_v3(
file_id: str,
):
return {
"code": 0,
"data": {
"method": "PUT",
"url": f"https://ragflow.szzysztech.com/apitest2/wps/v3/3rd/files/{file_id}/upload"
},
"message": ""
}
BASE_API_URL= "http://1.13.185.116:9380/api/v1"
@wps_router.put("/v3/3rd/files/{file_id}/upload", response_class=CustomJSONResponse)
async def receive_upload_file_v3(
file_id: str,
request: Request
):
"""
接收WPS服务器通过PUT请求传来的文件流
WPS服务器在收到您第一个接口返回的url后会将文件实体放在请求体(Body)中PUT到此接口
"""
# 从请求头中获取文件大小(如果提供了)
content_length = request.headers.get("content-length")
file_size = 0
if content_length:
try:
file_size = int(content_length)
# 这里可以添加文件大小校验逻辑,例如限制文件不能过大
except ValueError:
raise HTTPException(status_code=400, detail="Invalid Content-Length header")
# 获取请求体中的原始文件流数据
file_data = await request.body()
file_name = ""
if file_id == "demo_file_001":
file_name = "gv9014-bom.xlsx"
if file_id == "demo_file_002":
file_name = "GCDS100900040001.xlsx"
# 调用MINIO中转API上传文件
try:
# 准备表单数据
files = {
'file': (file_id, file_data) # 使用file_id作为文件名
}
data = {
'bucket': 'wps-web-office-files', # 替换为实际的bucket名称
'file_name': file_name
}
headers = {
"Authorization": "Bearer ragflow-NhZTY5Y2M4YWQ1MzExZWY4Zjc3MDI0Mm"
}
# 发送请求到MINIO中转API
minio_api_url = f"{BASE_API_URL}/minio/put" # 确保BASE_API_URL已定义
response = requests.post(
minio_api_url,
files=files,
data=data,
headers=headers
)
# 检查响应状态
if response.status_code == 200:
logging.info(f"File {file_id} successfully uploaded to MINIO")
return {
"code": 0,
"message": "File uploaded and processed successfully"
}
else:
logging.error(f"MINIO upload failed for {file_id}: {response.status_code} - {response.text}")
raise HTTPException(
status_code=500,
detail=f"Failed to upload file to storage: {response.text}"
)
except Exception as e:
logging.error(f"Error uploading file {file_id} to MINIO: {str(e)}")
raise HTTPException(
status_code=500,
detail=f"Internal server error during file upload: {str(e)}"
)
@wps_router.post("/v3/3rd/files/{file_id}/upload/complete", response_class=CustomJSONResponse)
async def upload_complete_v3(
file_id: str,
):
file_name = "GCDS100900040001.xlsx"
if file_id == "demo_file_001":
file_name = "GV9014-BOM.xlsx"
return {
"code": 0,
"data": {
"create_time": 1670218748,
"creator_id": "404",
"id": "9",
"modifier_id": "404",
"modify_time": 1670328304,
"name": file_name,
"size": 18961,
"version": 180
}
}
@wps_router.get("/v3/3rd/files/{file_id}/permission", response_class=CustomJSONResponse)
async def get_file_id_permission_v3(
file_id: str,
):
return {
"code": 0,
"data": {
"comment": 1,
"copy": 1,
"download": 1,
"history": 0,
"print": 1,
"read": 1,
"rename": 0,
"saveas": 1,
"update": 1,
"user_id": "404"
}
}
#GET
#
@wps_router.get("/v3/3rd/users", response_class=CustomJSONResponse)
async def get_users_info_v3(
user_ids: list[str] = Query(..., description="多个用户ID", alias="user_ids")
):
"""
批量获取用户信息 - 调试版本
直接返回示例数据不进行验证
"""
logging.info(f"批量获取用户信息调试: user_ids={user_ids}")
# 去重处理
unique_user_ids = list(set(user_ids))
# 构建用户信息列表
users_info = []
for user_id in unique_user_ids:
# 为每个用户ID生成对应的示例数据
user_info = {
"id": user_id,
"name": f"用户{user_id}",
# "avatar_url": f"https://example.com/avatars/{user_id}.jpg"
}
users_info.append(user_info)
logging.info(f"返回用户信息: {len(users_info)}个用户")
return {
"code": 0,
"data": users_info
}
@wps_router.get("/v3/3rd/files/{file_id}", response_class=CustomJSONResponse)
async def get_file_info_v3(
file_id: str,
):
logging.info(f"获取文件信息 /v3/3rd/files/{file_id}")
"""
获取文件基本信息 - V3版本接口
遵循WPS WebOffice文件ID一致性原则
"""
try:
# 验证WPS签名
"""
signature_data = verify_wps_signature_direct(
authorization, date, content_md5 or "", content_type or "", x_app_id, x_weboffice_token
)
# 解析用户token验证权限
user_info = parse_user_token(signature_data["token"])
if not user_info:
raise HTTPException(status_code=401, detail="Invalid user token")
"""
#logging.info(f"获取文件信息: file_id={file_id}, user_id={user_info['user_id']}")
logging.info(f"获取文件信息: file_id={file_id}")
# 获取文件信息 - 替换为你的实际数据库查询逻辑
file_info = get_file_by_id_v3(file_id, None)
if not file_info:
logging.warning(f"文件不存在: file_id={file_id}")
raise HTTPException(status_code=404, detail="File not found")
logging.info(f"获取文件信息 /v3/3rd/files/{file_info}")
# 检查用户对文件的访问权限
if not check_file_permission_v3(file_id, None): # user_info["user_id"]):
logging.warning(f"用户无权限访问文件: file_id={file_id}")
raise HTTPException(status_code=403, detail="Insufficient permissions")
# 构建响应数据严格遵循WPS规范
response_data = {
"id": file_info["id"], # 必须与传入的file_id一致
"name": file_info["name"],
"version": file_info["version"],
"size": file_info["size"],
"create_time": file_info["create_time"],
"modify_time": file_info["modify_time"],
"creator_id": "404", #file_info["creator_id"],
"modifier_id": file_info["modifier_id"]
}
# 验证响应数据格式
validation_error = validate_file_info_response(response_data)
if validation_error:
logging.error(f"文件信息响应数据验证失败: {validation_error}")
raise HTTPException(status_code=500, detail="Internal server error: invalid file data format")
logging.info(f"成功获取文件信息: file_id={file_id}")
return {
"code": 0,
"data": response_data
}
except HTTPException:
raise
except Exception as e:
logging.error(f"获取文件信息异常: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")
@wps_router.get("/v3/3rd/files/{file_id}/download", response_class=CustomJSONResponse)
async def get_file_download_url(
file_id: str
):
"""
获取文件下载地址 - 调试版本
返回文件的下载URL供WPS在线协同服务使用
"""
logging.info(f"获取文件下载地址: file_id={file_id}")
# 构建下载URL - 这里使用示例URL实际使用时替换为你的真实文件下载地址
if file_id == "demo_file_001":
download_url = f"http://1.13.185.116:9000/wps-web-office-files/gv9014-bom.xlsx"
elif file_id == "demo_file_002":
download_url = f"http://1.13.185.116:9000/wps-web-office-files/GCDS100900040001.xlsx"
elif file_id == "hanjie_sop":
download_url = f"http://1.13.185.116:9000/wps-web-office-files/hanjie_sop.xls"
else:
download_url = f"http://1.13.185.116:9000/wps-web-office-files/GCDS100900040001.xlsx"
# 构建响应数据
response_data = {
"url": download_url
# digest 和 digest_type 可选,用于文件校验
# "digest": "a1b2c3d4e5f6...", # 文件的MD5或SHA1值
# "digest_type": "md5", # 校验算法: md5 或 sha1
# headers 可选,用于需要额外请求头的场景(如防盗链)
# "headers": {
# "Referer": "https://your-domain.com",
# "Authorization": "Bearer your-token"
# }
}
logging.info(f"返回文件下载地址: {download_url}")
return {
"code": 0,
"data": response_data
}
def get_file_by_id_v3(file_id: str, user_id: str) -> Optional[Dict[str, Any]]:
"""
根据文件ID获取文件信息 - V3版本
需要你根据实际业务逻辑实现
"""
try:
# 这里替换为你的实际数据库查询逻辑
# 示例实现:
# 1. 查询数据库获取文件基本信息
# file_record = query_file_from_database(file_id)
# 2. 如果文件不存在返回None
# if not file_record:
# return None
# 3. 返回符合WPS规范的数据结构
# return {
# "id": file_record["file_id"], # 必须与传入的file_id一致
# "name": file_record["file_name"],
# "version": file_record["version"],
# "size": file_record["file_size"],
# "create_time": int(file_record["create_time"].timestamp()), # 转换为纪元秒
# "modify_time": int(file_record["update_time"].timestamp()), # 转换为纪元秒
# "creator_id": file_record["creator_id"],
# "modifier_id": file_record["last_modifier_id"]
# }
# 临时示例数据 - 请替换为实际实现
if file_id in ["example_file_123","dale_123","demo_file_001","demo_file_002","hanjie_sop"]:
return {
"id": file_id, # 必须与传入的file_id一致
"name": "统计月报.xlsx",
"version": 201,
"size": 18961,
"create_time": 1670218748, # 纪元秒
"modify_time": 1759478858, # 纪元秒
"creator_id": "user_404",
"modifier_id": "user_404"
}
else:
# 文件不存在
return None
except Exception as e:
logging.error(f"查询文件信息失败: {str(e)}")
return None
def check_file_permission_v3(file_id: str, user_id: str) -> bool:
"""
检查用户对文件的访问权限 - V3版本
需要你根据实际业务逻辑实现
"""
try:
# 这里替换为你的实际权限检查逻辑
# 示例实现:
# 1. 查询用户对文件的权限
# permission = query_file_permission(file_id, user_id)
# 2. 返回是否有访问权限
# return permission.get("can_read", False)
# 临时示例 - 请替换为实际实现
return True
except Exception as e:
logging.error(f"检查文件权限失败: {str(e)}")
return False
def validate_file_info_response(file_data: Dict[str, Any]) -> Optional[str]:
"""
验证文件信息响应数据是否符合WPS规范
"""
# 检查必需字段
required_fields = ["id", "name", "version", "size", "create_time", "modify_time", "creator_id", "modifier_id"]
for field in required_fields:
if field not in file_data:
return f"Missing required field: {field}"
# 验证文件ID长度
if len(file_data["id"]) > 47:
return "File ID exceeds maximum length of 47 characters"
# 验证文件名长度和特殊字符
if len(file_data["name"]) > 240:
return "File name exceeds maximum length of 240 characters"
invalid_chars = ['\\', '/', '|', '"', ':', '*', '?', '<', '>']
for char in invalid_chars:
if char in file_data["name"]:
return f"File name contains invalid character: {char}"
# 验证版本号
if not isinstance(file_data["version"], int) or file_data["version"] < 1:
return "Version must be a positive integer"
# 验证文件大小
if not isinstance(file_data["size"], int) or file_data["size"] < 0:
return "Size must be a non-negative integer"
# 验证时间戳
if not isinstance(file_data["create_time"], int) or file_data["create_time"] < 0:
return "Create time must be a non-negative integer"
if not isinstance(file_data["modify_time"], int) or file_data["modify_time"] < 0:
return "Modify time must be a non-negative integer"
return None
def verify_wps_signature_direct(
authorization: str,
date: str,
content_md5: str,
content_type: str,
x_app_id: str,
x_weboffice_token: str
) -> Dict[str, Any]:
"""
直接验证WPS请求签名
"""
try:
# 检查AppId是否匹配
if x_app_id != WPS_APP_ID:
raise HTTPException(status_code=401, detail="Invalid AppId")
# 解析Authorization头
if not authorization.startswith("WPS-2:"):
raise HTTPException(status_code=401, detail="Invalid signature format")
parts = authorization.split(":")
if len(parts) != 3:
raise HTTPException(status_code=401, detail="Invalid authorization header")
_, app_id, signature = parts
# 计算期望签名
string_to_sign = WPS_APP_SECRET + content_md5 + content_type + date
expected_signature = hashlib.sha1(string_to_sign.encode()).hexdigest()
# 验证签名
if not hmac.compare_digest(signature, expected_signature):
raise HTTPException(status_code=401, detail="Signature verification failed")
return {
"app_id": app_id,
"token": x_weboffice_token
}
except HTTPException:
raise
except Exception as e:
logging.error(f"WPS签名验证异常: {str(e)}")
raise HTTPException(status_code=401, detail="Signature verification error")