ragflow_python/asr-monitor-test/app/wps_office_service.py

# 新增的依赖项和工具函数
from fastapi import APIRouter, Depends, HTTPException, status, Request, Response,Query,Header
from fastapi.responses import StreamingResponse, JSONResponse
import hmac
import hashlib
import time,logging,json,requests
from typing import Dict, Any, Optional

# WPS应用配置 - 请替换为你的实际配置
WPS_APP_ID = "SX20251002WTFLCP"
WPS_APP_SECRET = "hoAGAXMTWXpkDxKFbTnSzjkckdFNNiSC"

class CustomJSONResponse(JSONResponse):
    """
    自定义 JSON 响应类，处理特殊类型：
    - datetime: 转换为 ISO 8601 字符串
    - date: 转换为 ISO 8601 字符串
    - Decimal: 转换为 float
    """

    def render(self, content: any) -> bytes:
        """
        重写渲染方法，使用自定义编码器
        """
        class EnhancedJSONEncoder(json.JSONEncoder):
            def default(self, obj):
                """
                增强型 JSON 编码器，处理多种特殊类型：
                - datetime: 转换为 ISO 8601 字符串
                - date: 转换为 ISO 8601 字符串
                - time: 转换为 ISO 8601 字符串
                - Decimal: 转换为 float
                - UUID: 转换为字符串
                - numpy 类型: 转换为 Python 原生类型
                """
                # 处理日期时间类型
                if isinstance(obj, datetime):
                    return obj.isoformat()

                if isinstance(obj, date):
                    return obj.isoformat()

                # 处理 Decimal 类型
                if isinstance(obj, Decimal):
                    return float(obj)

                # 处理 UUID 类型
                if isinstance(obj, UUID):
                    return str(obj)
                """
                # 处理 numpy 类型
                if isinstance(obj, np.integer):
                    return int(obj)
                if isinstance(obj, np.floating):
                    return float(obj)
                if isinstance(obj, np.ndarray):
                    return obj.tolist()
                """
                # 处理其他自定义类型
                if hasattr(obj, '__json__'):
                    return obj.__json__()

                # 默认处理
                return super().default(obj)

        return json.dumps(
            content,
            ensure_ascii=False,
            allow_nan=False,
            indent=None,
            separators=(",", ":"),
            cls=EnhancedJSONEncoder
        ).encode("utf-8")


def verify_wps_signature(
        authorization: str = Header(...),
        date: str = Header(...),
        content_md5: str = Header(...),
        content_type: str = Header(...),
        x_app_id: str = Header(..., alias="X-App-Id"),
        x_weboffice_token: str = Header(..., alias="X-WebOffice-Token")
) -> Dict[str, Any]:
    """
    验证WPS请求签名:cite[1]:cite[6]
    """
    try:
        # 检查AppId是否匹配
        if x_app_id != WPS_APP_ID:
            raise HTTPException(status_code=401, detail="Invalid AppId")

        # 解析Authorization头
        if not authorization.startswith("WPS-2:"):
            raise HTTPException(status_code=401, detail="Invalid signature format")

        parts = authorization.split(":")
        if len(parts) != 3:
            raise HTTPException(status_code=401, detail="Invalid authorization header")

        _, app_id, signature = parts

        # 计算期望签名
        string_to_sign = WPS_APP_SECRET + content_md5 + content_type + date
        expected_signature = hashlib.sha1(string_to_sign.encode()).hexdigest()

        # 验证签名
        if not hmac.compare_digest(signature, expected_signature):
            raise HTTPException(status_code=401, detail="Signature verification failed")

        return {
            "app_id": app_id,
            "token": x_weboffice_token
        }

    except Exception as e:
        if isinstance(e, HTTPException):
            raise e
        raise HTTPException(status_code=401, detail="Signature verification error")


def parse_user_token(token: str) -> Optional[Dict[str, Any]]:
    """
    解析用户Token - 根据你的业务逻辑实现
    """
    try:
        # 这里可以根据你的业务逻辑解析token
        # 例如JWT解码或其他验证方式
        payload = jwt.decode(token, JWT_SECRET_KEY, algorithms=[ALGORITHM])
        return {
            "user_id": payload.get("sub"),
            "permissions": payload.get("permissions", [])
        }
    except Exception:
        return None


# WPS回调路由
wps_router = APIRouter()

#三阶段保存的第一步主要用于 WebOffice 与接入方进行参数协商，目前主要协商摘要算法。
@wps_router.get("/v3/3rd/files/{file_id}/upload/prepare", response_class=CustomJSONResponse)
async def upload_prepare_v3(
        file_id: str,
):
    return {
  "code": 0,
  "data": {
    "digest_types": ["md5"]
  },
  "message": ""
}

@wps_router.post("/v3/3rd/files/{file_id}/upload/address", response_class=CustomJSONResponse)
async def upload_address_v3(
        file_id: str,
):
    return {
      "code": 0,
      "data": {
        "method": "PUT",
        "url": f"https://ragflow.szzysztech.com/apitest2/wps/v3/3rd/files/{file_id}/upload"
      },
      "message": ""
    }

BASE_API_URL= "http://1.13.185.116:9380/api/v1"

@wps_router.put("/v3/3rd/files/{file_id}/upload", response_class=CustomJSONResponse)
async def receive_upload_file_v3(
        file_id: str,
        request: Request
):
    """
    接收WPS服务器通过PUT请求传来的文件流。
    WPS服务器在收到您第一个接口返回的url后，会将文件实体放在请求体(Body)中PUT到此接口。
    """
    # 从请求头中获取文件大小（如果提供了）
    content_length = request.headers.get("content-length")
    file_size = 0
    if content_length:
        try:
            file_size = int(content_length)
            # 这里可以添加文件大小校验逻辑，例如限制文件不能过大
        except ValueError:
            raise HTTPException(status_code=400, detail="Invalid Content-Length header")

    # 获取请求体中的原始文件流数据
    file_data = await request.body()
    file_name = ""
    if file_id == "demo_file_001":
        file_name = "gv9014-bom.xlsx"
    if file_id == "demo_file_002":
        file_name = "GCDS100900040001.xlsx"
    # 调用MINIO中转API上传文件
    try:
        # 准备表单数据
        files = {
            'file': (file_id, file_data)  # 使用file_id作为文件名
        }

        data = {
            'bucket': 'wps-web-office-files',  # 替换为实际的bucket名称
            'file_name': file_name
        }

        headers = {
            "Authorization": "Bearer ragflow-NhZTY5Y2M4YWQ1MzExZWY4Zjc3MDI0Mm"
        }

        # 发送请求到MINIO中转API
        minio_api_url = f"{BASE_API_URL}/minio/put"  # 确保BASE_API_URL已定义

        response = requests.post(
            minio_api_url,
            files=files,
            data=data,
            headers=headers
        )

        # 检查响应状态
        if response.status_code == 200:
            logging.info(f"File {file_id} successfully uploaded to MINIO")
            return {
                "code": 0,
                "message": "File uploaded and processed successfully"
            }
        else:
            logging.error(f"MINIO upload failed for {file_id}: {response.status_code} - {response.text}")
            raise HTTPException(
                status_code=500,
                detail=f"Failed to upload file to storage: {response.text}"
            )

    except Exception as e:
        logging.error(f"Error uploading file {file_id} to MINIO: {str(e)}")
        raise HTTPException(
            status_code=500,
            detail=f"Internal server error during file upload: {str(e)}"
        )

@wps_router.post("/v3/3rd/files/{file_id}/upload/complete", response_class=CustomJSONResponse)
async def upload_complete_v3(
        file_id: str,
):
    file_name = "GCDS100900040001.xlsx"
    if file_id == "demo_file_001":
        file_name = "GV9014-BOM.xlsx"
    return {
          "code": 0,
          "data": {
            "create_time": 1670218748,
            "creator_id": "404",
            "id": "9",
            "modifier_id": "404",
            "modify_time": 1670328304,
            "name": file_name,
            "size": 18961,
            "version": 180
          }
        }


@wps_router.get("/v3/3rd/files/{file_id}/permission", response_class=CustomJSONResponse)
async def get_file_id_permission_v3(
        file_id: str,
):
    return {
        "code": 0,
        "data": {
            "comment": 1,
            "copy": 1,
            "download": 1,
            "history": 0,
            "print": 1,
            "read": 1,
            "rename": 0,
            "saveas": 1,
            "update": 1,
            "user_id": "404"
        }
    }
#GET
#
@wps_router.get("/v3/3rd/users", response_class=CustomJSONResponse)
async def get_users_info_v3(
        user_ids: list[str] = Query(..., description="多个用户ID", alias="user_ids")
):
    """
    批量获取用户信息 - 调试版本
    直接返回示例数据，不进行验证
    """
    logging.info(f"批量获取用户信息调试: user_ids={user_ids}")

    # 去重处理
    unique_user_ids = list(set(user_ids))

    # 构建用户信息列表
    users_info = []

    for user_id in unique_user_ids:
        # 为每个用户ID生成对应的示例数据
        user_info = {
            "id": user_id,
            "name": f"用户{user_id}",
            # "avatar_url": f"https://example.com/avatars/{user_id}.jpg"
        }
        users_info.append(user_info)

    logging.info(f"返回用户信息: {len(users_info)}个用户")

    return {
        "code": 0,
        "data": users_info
    }


@wps_router.get("/v3/3rd/files/{file_id}", response_class=CustomJSONResponse)
async def get_file_info_v3(
        file_id: str,
):
    logging.info(f"获取文件信息 /v3/3rd/files/{file_id}")
    """
    获取文件基本信息 - V3版本接口
    遵循WPS WebOffice文件ID一致性原则
    """
    try:
        # 验证WPS签名
        """
        signature_data = verify_wps_signature_direct(
            authorization, date, content_md5 or "", content_type or "", x_app_id, x_weboffice_token
        )

        # 解析用户token验证权限
        user_info = parse_user_token(signature_data["token"])
        if not user_info:
            raise HTTPException(status_code=401, detail="Invalid user token")
        """
        #logging.info(f"获取文件信息: file_id={file_id}, user_id={user_info['user_id']}")
        logging.info(f"获取文件信息: file_id={file_id}")
        # 获取文件信息 - 替换为你的实际数据库查询逻辑
        file_info = get_file_by_id_v3(file_id, None)
        if not file_info:
            logging.warning(f"文件不存在: file_id={file_id}")
            raise HTTPException(status_code=404, detail="File not found")
        logging.info(f"获取文件信息 /v3/3rd/files/{file_info}")
        # 检查用户对文件的访问权限
        if not check_file_permission_v3(file_id, None): # user_info["user_id"]):
            logging.warning(f"用户无权限访问文件:  file_id={file_id}")
            raise HTTPException(status_code=403, detail="Insufficient permissions")

        # 构建响应数据，严格遵循WPS规范
        response_data = {
            "id": file_info["id"],  # 必须与传入的file_id一致
            "name": file_info["name"],
            "version": file_info["version"],
            "size": file_info["size"],
            "create_time": file_info["create_time"],
            "modify_time": file_info["modify_time"],
            "creator_id": "404", #file_info["creator_id"],
            "modifier_id": file_info["modifier_id"]
        }

        # 验证响应数据格式
        validation_error = validate_file_info_response(response_data)
        if validation_error:
            logging.error(f"文件信息响应数据验证失败: {validation_error}")
            raise HTTPException(status_code=500, detail="Internal server error: invalid file data format")

        logging.info(f"成功获取文件信息: file_id={file_id}")

        return {
            "code": 0,
            "data": response_data
        }

    except HTTPException:
        raise
    except Exception as e:
        logging.error(f"获取文件信息异常: {str(e)}")
        raise HTTPException(status_code=500, detail="Internal server error")


@wps_router.get("/v3/3rd/files/{file_id}/download", response_class=CustomJSONResponse)
async def get_file_download_url(
        file_id: str
):
    """
    获取文件下载地址 - 调试版本
    返回文件的下载URL，供WPS在线协同服务使用
    """
    logging.info(f"获取文件下载地址: file_id={file_id}")

    # 构建下载URL - 这里使用示例URL，实际使用时替换为你的真实文件下载地址
    if file_id == "demo_file_001":
        download_url = f"http://1.13.185.116:9000/wps-web-office-files/gv9014-bom.xlsx"
    elif file_id == "demo_file_002":
        download_url = f"http://1.13.185.116:9000/wps-web-office-files/GCDS100900040001.xlsx"
    elif file_id == "hanjie_sop":
        download_url = f"http://1.13.185.116:9000/wps-web-office-files/hanjie_sop.xls"
    else:
        download_url = f"http://1.13.185.116:9000/wps-web-office-files/GCDS100900040001.xlsx"
    # 构建响应数据
    response_data = {
        "url": download_url
        # digest 和 digest_type 可选，用于文件校验
        # "digest": "a1b2c3d4e5f6...",  # 文件的MD5或SHA1值
        # "digest_type": "md5",          # 校验算法: md5 或 sha1

        # headers 可选，用于需要额外请求头的场景（如防盗链）
        # "headers": {
        #     "Referer": "https://your-domain.com",
        #     "Authorization": "Bearer your-token"
        # }
    }

    logging.info(f"返回文件下载地址: {download_url}")

    return {
        "code": 0,
        "data": response_data
    }


def get_file_by_id_v3(file_id: str, user_id: str) -> Optional[Dict[str, Any]]:
    """
    根据文件ID获取文件信息 - V3版本
    需要你根据实际业务逻辑实现
    """
    try:
        # 这里替换为你的实际数据库查询逻辑
        # 示例实现：

        # 1. 查询数据库获取文件基本信息
        # file_record = query_file_from_database(file_id)

        # 2. 如果文件不存在，返回None
        # if not file_record:
        #     return None

        # 3. 返回符合WPS规范的数据结构
        # return {
        #     "id": file_record["file_id"],  # 必须与传入的file_id一致
        #     "name": file_record["file_name"],
        #     "version": file_record["version"],
        #     "size": file_record["file_size"],
        #     "create_time": int(file_record["create_time"].timestamp()),  # 转换为纪元秒
        #     "modify_time": int(file_record["update_time"].timestamp()),  # 转换为纪元秒
        #     "creator_id": file_record["creator_id"],
        #     "modifier_id": file_record["last_modifier_id"]
        # }

        # 临时示例数据 - 请替换为实际实现
        if file_id in  ["example_file_123","dale_123","demo_file_001","demo_file_002","hanjie_sop"]:
            return {
                "id": file_id,  # 必须与传入的file_id一致
                "name": "统计月报.xlsx",
                "version": 201,
                "size": 18961,
                "create_time": 1670218748,  # 纪元秒
                "modify_time": 1759478858,  # 纪元秒
                "creator_id": "user_404",
                "modifier_id": "user_404"
            }
        else:
            # 文件不存在
            return None

    except Exception as e:
        logging.error(f"查询文件信息失败: {str(e)}")
        return None


def check_file_permission_v3(file_id: str, user_id: str) -> bool:
    """
    检查用户对文件的访问权限 - V3版本
    需要你根据实际业务逻辑实现
    """
    try:
        # 这里替换为你的实际权限检查逻辑
        # 示例实现：

        # 1. 查询用户对文件的权限
        # permission = query_file_permission(file_id, user_id)

        # 2. 返回是否有访问权限
        # return permission.get("can_read", False)

        # 临时示例 - 请替换为实际实现
        return True

    except Exception as e:
        logging.error(f"检查文件权限失败: {str(e)}")
        return False


def validate_file_info_response(file_data: Dict[str, Any]) -> Optional[str]:
    """
    验证文件信息响应数据是否符合WPS规范
    """
    # 检查必需字段
    required_fields = ["id", "name", "version", "size", "create_time", "modify_time", "creator_id", "modifier_id"]
    for field in required_fields:
        if field not in file_data:
            return f"Missing required field: {field}"

    # 验证文件ID长度
    if len(file_data["id"]) > 47:
        return "File ID exceeds maximum length of 47 characters"

    # 验证文件名长度和特殊字符
    if len(file_data["name"]) > 240:
        return "File name exceeds maximum length of 240 characters"

    invalid_chars = ['\\', '/', '|', '"', ':', '*', '?', '<', '>']
    for char in invalid_chars:
        if char in file_data["name"]:
            return f"File name contains invalid character: {char}"

    # 验证版本号
    if not isinstance(file_data["version"], int) or file_data["version"] < 1:
        return "Version must be a positive integer"

    # 验证文件大小
    if not isinstance(file_data["size"], int) or file_data["size"] < 0:
        return "Size must be a non-negative integer"

    # 验证时间戳
    if not isinstance(file_data["create_time"], int) or file_data["create_time"] < 0:
        return "Create time must be a non-negative integer"

    if not isinstance(file_data["modify_time"], int) or file_data["modify_time"] < 0:
        return "Modify time must be a non-negative integer"

    return None


def verify_wps_signature_direct(
        authorization: str,
        date: str,
        content_md5: str,
        content_type: str,
        x_app_id: str,
        x_weboffice_token: str
) -> Dict[str, Any]:
    """
    直接验证WPS请求签名
    """
    try:
        # 检查AppId是否匹配
        if x_app_id != WPS_APP_ID:
            raise HTTPException(status_code=401, detail="Invalid AppId")

        # 解析Authorization头
        if not authorization.startswith("WPS-2:"):
            raise HTTPException(status_code=401, detail="Invalid signature format")

        parts = authorization.split(":")
        if len(parts) != 3:
            raise HTTPException(status_code=401, detail="Invalid authorization header")

        _, app_id, signature = parts

        # 计算期望签名
        string_to_sign = WPS_APP_SECRET + content_md5 + content_type + date
        expected_signature = hashlib.sha1(string_to_sign.encode()).hexdigest()

        # 验证签名
        if not hmac.compare_digest(signature, expected_signature):
            raise HTTPException(status_code=401, detail="Signature verification failed")

        return {
            "app_id": app_id,
            "token": x_weboffice_token
        }

    except HTTPException:
        raise
    except Exception as e:
        logging.error(f"WPS签名验证异常: {str(e)}")
        raise HTTPException(status_code=401, detail="Signature verification error")