# 新增的依赖项和工具函数 from fastapi import APIRouter, Depends, HTTPException, status, Request, Response,Query,Header from fastapi.responses import StreamingResponse, JSONResponse import hmac import hashlib import time,logging,json,requests from typing import Dict, Any, Optional # WPS应用配置 - 请替换为你的实际配置 WPS_APP_ID = "SX20251002WTFLCP" WPS_APP_SECRET = "hoAGAXMTWXpkDxKFbTnSzjkckdFNNiSC" class CustomJSONResponse(JSONResponse): """ 自定义 JSON 响应类,处理特殊类型: - datetime: 转换为 ISO 8601 字符串 - date: 转换为 ISO 8601 字符串 - Decimal: 转换为 float """ def render(self, content: any) -> bytes: """ 重写渲染方法,使用自定义编码器 """ class EnhancedJSONEncoder(json.JSONEncoder): def default(self, obj): """ 增强型 JSON 编码器,处理多种特殊类型: - datetime: 转换为 ISO 8601 字符串 - date: 转换为 ISO 8601 字符串 - time: 转换为 ISO 8601 字符串 - Decimal: 转换为 float - UUID: 转换为字符串 - numpy 类型: 转换为 Python 原生类型 """ # 处理日期时间类型 if isinstance(obj, datetime): return obj.isoformat() if isinstance(obj, date): return obj.isoformat() # 处理 Decimal 类型 if isinstance(obj, Decimal): return float(obj) # 处理 UUID 类型 if isinstance(obj, UUID): return str(obj) """ # 处理 numpy 类型 if isinstance(obj, np.integer): return int(obj) if isinstance(obj, np.floating): return float(obj) if isinstance(obj, np.ndarray): return obj.tolist() """ # 处理其他自定义类型 if hasattr(obj, '__json__'): return obj.__json__() # 默认处理 return super().default(obj) return json.dumps( content, ensure_ascii=False, allow_nan=False, indent=None, separators=(",", ":"), cls=EnhancedJSONEncoder ).encode("utf-8") def verify_wps_signature( authorization: str = Header(...), date: str = Header(...), content_md5: str = Header(...), content_type: str = Header(...), x_app_id: str = Header(..., alias="X-App-Id"), x_weboffice_token: str = Header(..., alias="X-WebOffice-Token") ) -> Dict[str, Any]: """ 验证WPS请求签名:cite[1]:cite[6] """ try: # 检查AppId是否匹配 if x_app_id != WPS_APP_ID: raise HTTPException(status_code=401, detail="Invalid AppId") # 解析Authorization头 if not authorization.startswith("WPS-2:"): raise HTTPException(status_code=401, detail="Invalid signature format") parts = authorization.split(":") if len(parts) != 3: raise HTTPException(status_code=401, detail="Invalid authorization header") _, app_id, signature = parts # 计算期望签名 string_to_sign = WPS_APP_SECRET + content_md5 + content_type + date expected_signature = hashlib.sha1(string_to_sign.encode()).hexdigest() # 验证签名 if not hmac.compare_digest(signature, expected_signature): raise HTTPException(status_code=401, detail="Signature verification failed") return { "app_id": app_id, "token": x_weboffice_token } except Exception as e: if isinstance(e, HTTPException): raise e raise HTTPException(status_code=401, detail="Signature verification error") def parse_user_token(token: str) -> Optional[Dict[str, Any]]: """ 解析用户Token - 根据你的业务逻辑实现 """ try: # 这里可以根据你的业务逻辑解析token # 例如JWT解码或其他验证方式 payload = jwt.decode(token, JWT_SECRET_KEY, algorithms=[ALGORITHM]) return { "user_id": payload.get("sub"), "permissions": payload.get("permissions", []) } except Exception: return None # WPS回调路由 wps_router = APIRouter() #三阶段保存的第一步主要用于 WebOffice 与接入方进行参数协商,目前主要协商摘要算法。 @wps_router.get("/v3/3rd/files/{file_id}/upload/prepare", response_class=CustomJSONResponse) async def upload_prepare_v3( file_id: str, ): return { "code": 0, "data": { "digest_types": ["md5"] }, "message": "" } @wps_router.post("/v3/3rd/files/{file_id}/upload/address", response_class=CustomJSONResponse) async def upload_address_v3( file_id: str, ): return { "code": 0, "data": { "method": "PUT", "url": f"https://ragflow.szzysztech.com/apitest2/wps/v3/3rd/files/{file_id}/upload" }, "message": "" } BASE_API_URL= "http://1.13.185.116:9380/api/v1" @wps_router.put("/v3/3rd/files/{file_id}/upload", response_class=CustomJSONResponse) async def receive_upload_file_v3( file_id: str, request: Request ): """ 接收WPS服务器通过PUT请求传来的文件流。 WPS服务器在收到您第一个接口返回的url后,会将文件实体放在请求体(Body)中PUT到此接口。 """ # 从请求头中获取文件大小(如果提供了) content_length = request.headers.get("content-length") file_size = 0 if content_length: try: file_size = int(content_length) # 这里可以添加文件大小校验逻辑,例如限制文件不能过大 except ValueError: raise HTTPException(status_code=400, detail="Invalid Content-Length header") # 获取请求体中的原始文件流数据 file_data = await request.body() file_name = "" if file_id == "demo_file_001": file_name = "gv9014-bom.xlsx" if file_id == "demo_file_002": file_name = "GCDS100900040001.xlsx" # 调用MINIO中转API上传文件 try: # 准备表单数据 files = { 'file': (file_id, file_data) # 使用file_id作为文件名 } data = { 'bucket': 'wps-web-office-files', # 替换为实际的bucket名称 'file_name': file_name } headers = { "Authorization": "Bearer ragflow-NhZTY5Y2M4YWQ1MzExZWY4Zjc3MDI0Mm" } # 发送请求到MINIO中转API minio_api_url = f"{BASE_API_URL}/minio/put" # 确保BASE_API_URL已定义 response = requests.post( minio_api_url, files=files, data=data, headers=headers ) # 检查响应状态 if response.status_code == 200: logging.info(f"File {file_id} successfully uploaded to MINIO") return { "code": 0, "message": "File uploaded and processed successfully" } else: logging.error(f"MINIO upload failed for {file_id}: {response.status_code} - {response.text}") raise HTTPException( status_code=500, detail=f"Failed to upload file to storage: {response.text}" ) except Exception as e: logging.error(f"Error uploading file {file_id} to MINIO: {str(e)}") raise HTTPException( status_code=500, detail=f"Internal server error during file upload: {str(e)}" ) @wps_router.post("/v3/3rd/files/{file_id}/upload/complete", response_class=CustomJSONResponse) async def upload_complete_v3( file_id: str, ): file_name = "GCDS100900040001.xlsx" if file_id == "demo_file_001": file_name = "GV9014-BOM.xlsx" return { "code": 0, "data": { "create_time": 1670218748, "creator_id": "404", "id": "9", "modifier_id": "404", "modify_time": 1670328304, "name": file_name, "size": 18961, "version": 180 } } @wps_router.get("/v3/3rd/files/{file_id}/permission", response_class=CustomJSONResponse) async def get_file_id_permission_v3( file_id: str, ): return { "code": 0, "data": { "comment": 1, "copy": 1, "download": 1, "history": 0, "print": 1, "read": 1, "rename": 0, "saveas": 1, "update": 1, "user_id": "404" } } #GET # @wps_router.get("/v3/3rd/users", response_class=CustomJSONResponse) async def get_users_info_v3( user_ids: list[str] = Query(..., description="多个用户ID", alias="user_ids") ): """ 批量获取用户信息 - 调试版本 直接返回示例数据,不进行验证 """ logging.info(f"批量获取用户信息调试: user_ids={user_ids}") # 去重处理 unique_user_ids = list(set(user_ids)) # 构建用户信息列表 users_info = [] for user_id in unique_user_ids: # 为每个用户ID生成对应的示例数据 user_info = { "id": user_id, "name": f"用户{user_id}", # "avatar_url": f"https://example.com/avatars/{user_id}.jpg" } users_info.append(user_info) logging.info(f"返回用户信息: {len(users_info)}个用户") return { "code": 0, "data": users_info } @wps_router.get("/v3/3rd/files/{file_id}", response_class=CustomJSONResponse) async def get_file_info_v3( file_id: str, ): logging.info(f"获取文件信息 /v3/3rd/files/{file_id}") """ 获取文件基本信息 - V3版本接口 遵循WPS WebOffice文件ID一致性原则 """ try: # 验证WPS签名 """ signature_data = verify_wps_signature_direct( authorization, date, content_md5 or "", content_type or "", x_app_id, x_weboffice_token ) # 解析用户token验证权限 user_info = parse_user_token(signature_data["token"]) if not user_info: raise HTTPException(status_code=401, detail="Invalid user token") """ #logging.info(f"获取文件信息: file_id={file_id}, user_id={user_info['user_id']}") logging.info(f"获取文件信息: file_id={file_id}") # 获取文件信息 - 替换为你的实际数据库查询逻辑 file_info = get_file_by_id_v3(file_id, None) if not file_info: logging.warning(f"文件不存在: file_id={file_id}") raise HTTPException(status_code=404, detail="File not found") logging.info(f"获取文件信息 /v3/3rd/files/{file_info}") # 检查用户对文件的访问权限 if not check_file_permission_v3(file_id, None): # user_info["user_id"]): logging.warning(f"用户无权限访问文件: file_id={file_id}") raise HTTPException(status_code=403, detail="Insufficient permissions") # 构建响应数据,严格遵循WPS规范 response_data = { "id": file_info["id"], # 必须与传入的file_id一致 "name": file_info["name"], "version": file_info["version"], "size": file_info["size"], "create_time": file_info["create_time"], "modify_time": file_info["modify_time"], "creator_id": "404", #file_info["creator_id"], "modifier_id": file_info["modifier_id"] } # 验证响应数据格式 validation_error = validate_file_info_response(response_data) if validation_error: logging.error(f"文件信息响应数据验证失败: {validation_error}") raise HTTPException(status_code=500, detail="Internal server error: invalid file data format") logging.info(f"成功获取文件信息: file_id={file_id}") return { "code": 0, "data": response_data } except HTTPException: raise except Exception as e: logging.error(f"获取文件信息异常: {str(e)}") raise HTTPException(status_code=500, detail="Internal server error") @wps_router.get("/v3/3rd/files/{file_id}/download", response_class=CustomJSONResponse) async def get_file_download_url( file_id: str ): """ 获取文件下载地址 - 调试版本 返回文件的下载URL,供WPS在线协同服务使用 """ logging.info(f"获取文件下载地址: file_id={file_id}") # 构建下载URL - 这里使用示例URL,实际使用时替换为你的真实文件下载地址 if file_id == "demo_file_001": download_url = f"http://1.13.185.116:9000/wps-web-office-files/gv9014-bom.xlsx" elif file_id == "demo_file_002": download_url = f"http://1.13.185.116:9000/wps-web-office-files/GCDS100900040001.xlsx" elif file_id == "hanjie_sop": download_url = f"http://1.13.185.116:9000/wps-web-office-files/hanjie_sop.xls" else: download_url = f"http://1.13.185.116:9000/wps-web-office-files/GCDS100900040001.xlsx" # 构建响应数据 response_data = { "url": download_url # digest 和 digest_type 可选,用于文件校验 # "digest": "a1b2c3d4e5f6...", # 文件的MD5或SHA1值 # "digest_type": "md5", # 校验算法: md5 或 sha1 # headers 可选,用于需要额外请求头的场景(如防盗链) # "headers": { # "Referer": "https://your-domain.com", # "Authorization": "Bearer your-token" # } } logging.info(f"返回文件下载地址: {download_url}") return { "code": 0, "data": response_data } def get_file_by_id_v3(file_id: str, user_id: str) -> Optional[Dict[str, Any]]: """ 根据文件ID获取文件信息 - V3版本 需要你根据实际业务逻辑实现 """ try: # 这里替换为你的实际数据库查询逻辑 # 示例实现: # 1. 查询数据库获取文件基本信息 # file_record = query_file_from_database(file_id) # 2. 如果文件不存在,返回None # if not file_record: # return None # 3. 返回符合WPS规范的数据结构 # return { # "id": file_record["file_id"], # 必须与传入的file_id一致 # "name": file_record["file_name"], # "version": file_record["version"], # "size": file_record["file_size"], # "create_time": int(file_record["create_time"].timestamp()), # 转换为纪元秒 # "modify_time": int(file_record["update_time"].timestamp()), # 转换为纪元秒 # "creator_id": file_record["creator_id"], # "modifier_id": file_record["last_modifier_id"] # } # 临时示例数据 - 请替换为实际实现 if file_id in ["example_file_123","dale_123","demo_file_001","demo_file_002","hanjie_sop"]: return { "id": file_id, # 必须与传入的file_id一致 "name": "统计月报.xlsx", "version": 201, "size": 18961, "create_time": 1670218748, # 纪元秒 "modify_time": 1759478858, # 纪元秒 "creator_id": "user_404", "modifier_id": "user_404" } else: # 文件不存在 return None except Exception as e: logging.error(f"查询文件信息失败: {str(e)}") return None def check_file_permission_v3(file_id: str, user_id: str) -> bool: """ 检查用户对文件的访问权限 - V3版本 需要你根据实际业务逻辑实现 """ try: # 这里替换为你的实际权限检查逻辑 # 示例实现: # 1. 查询用户对文件的权限 # permission = query_file_permission(file_id, user_id) # 2. 返回是否有访问权限 # return permission.get("can_read", False) # 临时示例 - 请替换为实际实现 return True except Exception as e: logging.error(f"检查文件权限失败: {str(e)}") return False def validate_file_info_response(file_data: Dict[str, Any]) -> Optional[str]: """ 验证文件信息响应数据是否符合WPS规范 """ # 检查必需字段 required_fields = ["id", "name", "version", "size", "create_time", "modify_time", "creator_id", "modifier_id"] for field in required_fields: if field not in file_data: return f"Missing required field: {field}" # 验证文件ID长度 if len(file_data["id"]) > 47: return "File ID exceeds maximum length of 47 characters" # 验证文件名长度和特殊字符 if len(file_data["name"]) > 240: return "File name exceeds maximum length of 240 characters" invalid_chars = ['\\', '/', '|', '"', ':', '*', '?', '<', '>'] for char in invalid_chars: if char in file_data["name"]: return f"File name contains invalid character: {char}" # 验证版本号 if not isinstance(file_data["version"], int) or file_data["version"] < 1: return "Version must be a positive integer" # 验证文件大小 if not isinstance(file_data["size"], int) or file_data["size"] < 0: return "Size must be a non-negative integer" # 验证时间戳 if not isinstance(file_data["create_time"], int) or file_data["create_time"] < 0: return "Create time must be a non-negative integer" if not isinstance(file_data["modify_time"], int) or file_data["modify_time"] < 0: return "Modify time must be a non-negative integer" return None def verify_wps_signature_direct( authorization: str, date: str, content_md5: str, content_type: str, x_app_id: str, x_weboffice_token: str ) -> Dict[str, Any]: """ 直接验证WPS请求签名 """ try: # 检查AppId是否匹配 if x_app_id != WPS_APP_ID: raise HTTPException(status_code=401, detail="Invalid AppId") # 解析Authorization头 if not authorization.startswith("WPS-2:"): raise HTTPException(status_code=401, detail="Invalid signature format") parts = authorization.split(":") if len(parts) != 3: raise HTTPException(status_code=401, detail="Invalid authorization header") _, app_id, signature = parts # 计算期望签名 string_to_sign = WPS_APP_SECRET + content_md5 + content_type + date expected_signature = hashlib.sha1(string_to_sign.encode()).hexdigest() # 验证签名 if not hmac.compare_digest(signature, expected_signature): raise HTTPException(status_code=401, detail="Signature verification failed") return { "app_id": app_id, "token": x_weboffice_token } except HTTPException: raise except Exception as e: logging.error(f"WPS签名验证异常: {str(e)}") raise HTTPException(status_code=401, detail="Signature verification error")