# -*- coding: utf-8 -*- import os import re import io from minio import Minio from minio.error import S3Error def process_minio_and_local_objects(minio_client, bucket_name, base_path, season_dirs, local_base_path, date_suffix): """ 处理MinIO和本地文件系统中的对象,为图片文件添加日期后缀,并更新markdown文件中的引用 :param date_suffix: 日期后缀,例如 '_1114' """ # 记录所有重命名映射关系 {旧对象键: 新对象键} rename_mappings = {} # 记录所有需要更新的markdown文件及其更新内容 minio_markdown_updates = {} local_markdown_updates = {} # 先收集所有markdown文件的内容,构建索引 minio_markdown_index = build_minio_markdown_index(minio_client, bucket_name, base_path) local_markdown_index = build_local_markdown_index(local_base_path) # 构建本地图片文件索引 local_image_index = build_local_image_index(local_base_path) for season_dir in season_dirs: print(f"\n处理季节目录: {season_dir} {base_path}") # 构造完整路径 full_path = f"{base_path}/{season_dir}/" # 递归处理季节目录下的所有子目录和图片 season_rename_mappings = process_directory_recursive( minio_client, bucket_name, full_path, season_dir, base_path, minio_markdown_index, local_markdown_index, local_image_index, minio_markdown_updates, local_markdown_updates, local_base_path, date_suffix ) rename_mappings.update(season_rename_mappings) # 显示所有更改并请求确认 if show_changes_and_confirm(rename_mappings, minio_markdown_updates, local_markdown_updates): # 用户确认后执行所有更改 execute_changes(minio_client, bucket_name, rename_mappings, minio_markdown_updates, local_markdown_updates) return rename_mappings else: print("\n操作已取消,未执行任何更改。") return {} def build_minio_markdown_index(minio_client, bucket_name, base_path): """ 构建MinIO中markdown文件索引,用于快速查找引用关系 """ print("构建MinIO中markdown文件索引...") markdown_index = {} # 查找所有markdown文件 markdown_files = find_minio_markdown_files(minio_client, bucket_name, base_path) for md_file in markdown_files: try: # 下载markdown文件内容 response = minio_client.get_object(bucket_name, md_file) content = response.read().decode('utf-8') response.close() response.release_conn() # 提取所有图片引用 image_refs = extract_image_references(content) # 为每个引用的图片记录markdown文件 for image_ref in image_refs: if image_ref not in markdown_index: markdown_index[image_ref] = [] markdown_index[image_ref].append(md_file) except S3Error as exc: print(f"处理MinIO中markdown文件 {md_file} 时发生错误: {exc}") print(f"MinIO索引构建完成,共找到 {len(markdown_index)} 个图片引用") return markdown_index def build_local_markdown_index(local_base_path): """ 构建本地markdown文件索引,用于快速查找引用关系 """ print("构建本地markdown文件索引...") markdown_index = {} # 查找所有markdown文件 markdown_files = find_local_markdown_files(local_base_path) for md_file in markdown_files: try: # 读取markdown文件内容 with open(md_file, 'r', encoding='utf-8') as f: content = f.read() # 提取所有图片引用 image_refs = extract_image_references(content) # 为每个引用的图片记录markdown文件 for image_ref in image_refs: if image_ref not in markdown_index: markdown_index[image_ref] = [] markdown_index[image_ref].append(md_file) except Exception as exc: print(f"处理本地markdown文件 {md_file} 时发生错误: {exc}") print(f"本地索引构建完成,共找到 {len(markdown_index)} 个图片引用") return markdown_index def build_local_image_index(local_base_path): """ 构建本地图片文件索引 """ print("构建本地图片文件索引...") image_index = {} # 递归查找所有图片文件 for root, dirs, files in os.walk(local_base_path): for file in files: if is_image_file(file): filename = os.path.basename(file) if filename not in image_index: image_index[filename] = [] image_index[filename].append(os.path.join(root, file)) print(f"本地图片索引构建完成,共找到 {len(image_index)} 个图片文件") return image_index def extract_image_references(content): """ 从markdown内容中提取所有图片引用 """ # 匹配markdown图片语法 ![](url) pattern = r'!\[\]\(([^)]+)\)' matches = re.findall(pattern, content) # 提取图片文件名(去掉URL前缀) image_refs = [] for match in matches: # 从URL中提取文件名 filename = os.path.basename(match) if filename: image_refs.append(filename) return image_refs def process_directory_recursive( minio_client, bucket_name, current_path, season_dir, base_path, minio_markdown_index, local_markdown_index, local_image_index, minio_markdown_updates, local_markdown_updates, local_base_path, date_suffix ): """ 递归处理目录及其所有子目录 :param date_suffix: 日期后缀,例如 '_1114' """ rename_mappings = {} try: # 列出当前目录下的所有对象 objects = minio_client.list_objects(bucket_name, prefix=current_path, recursive=False) for obj in objects: object_name = obj.object_name # 如果是目录,递归处理 if object_name.endswith('/'): subdir_rename_mappings = process_directory_recursive( minio_client, bucket_name, object_name, season_dir, base_path, minio_markdown_index, local_markdown_index, local_image_index, minio_markdown_updates, local_markdown_updates, local_base_path, date_suffix ) rename_mappings.update(subdir_rename_mappings) else: # 处理文件 if is_image_file(object_name): # 分离文件名和扩展名 filename = os.path.basename(object_name) name_part, ext_part = os.path.splitext(filename) # 检查是否已经包含类似的日期后缀(格式:_数字) date_pattern = r'_\d{4}$' need_rename = False new_filename = None # 初始化变量 if re.search(date_pattern, name_part): # 已有日期后缀,替换为新的后缀 new_name_part = re.sub(date_pattern, date_suffix, name_part) new_filename = f"{new_name_part}{ext_part}" need_rename = True elif not name_part.endswith(date_suffix): # 没有日期后缀,添加新后缀 new_filename = f"{name_part}{date_suffix}{ext_part}" need_rename = True if need_rename and new_filename: # 生成新的文件名(添加或替换日期后缀) # 构造新的完整对象路径 dir_path = os.path.dirname(object_name) new_object_name = f"{dir_path}/{new_filename}" if dir_path else new_filename # 查找引用此图片的markdown文件 minio_affected_markdowns = minio_markdown_index.get(filename, []) local_affected_markdowns = local_markdown_index.get(filename, []) # 查找本地对应的图片文件 local_affected_images = local_image_index.get(filename, []) # 显示修改提示并请求确认 rel_path = object_name.replace(f"{base_path}/{season_dir}/", "") new_rel_path = new_object_name.replace(f"{base_path}/{season_dir}/", "") # 移除缩进,这段代码已经在 if need_rename 块内 print(f"\n{'=' * 80}") print(f"发现需要重命名: {rel_path} -> {new_rel_path}") print(f"{'=' * 80}") # 显示MinIO中受影响的markdown文件及其具体修改内容 if minio_affected_markdowns: print(f"MinIO中此图片被以下 {len(minio_affected_markdowns)} 个markdown文件引用:") for md_file in minio_affected_markdowns: md_filename = os.path.basename(md_file) print(f" - {md_filename}:") # 获取并显示具体修改内容 show_markdown_changes(minio_client, bucket_name, md_file, filename, new_filename, "MinIO") # 显示本地受影响的markdown文件及其具体修改内容 if local_affected_markdowns: print(f"本地此图片被以下 {len(local_affected_markdowns)} 个markdown文件引用:") for md_file in local_affected_markdowns: md_filename = os.path.basename(md_file) print(f" - {md_filename}:") # 获取并显示具体修改内容 show_markdown_changes_local(md_file, filename, new_filename, "本地") # 显示本地受影响的图片文件 if local_affected_images: print(f"本地有以下 {len(local_affected_images)} 个同名图片文件需要重命名:") for img_file in local_affected_images: rel_img_path = os.path.relpath(img_file, local_base_path) new_img_file = os.path.join(os.path.dirname(img_file), new_filename) rel_new_img_path = os.path.relpath(new_img_file, local_base_path) print(f" - {rel_img_path} -> {rel_new_img_path}") # 确认修改 if confirm_single_change("MinIO和本地文件修改"): # 记录重命名映射 rename_mappings[object_name] = new_object_name # 记录MinIO markdown更新 for md_file in minio_affected_markdowns: if md_file not in minio_markdown_updates: minio_markdown_updates[md_file] = { 'old_filename': filename, 'new_filename': new_filename } # 记录本地markdown更新 for md_file in local_affected_markdowns: if md_file not in local_markdown_updates: local_markdown_updates[md_file] = { 'type': 'markdown', 'old_filename': filename, 'new_filename': new_filename } # 记录本地图片重命名(使用单独的字典或标记类型) for img_file in local_affected_images: new_img_file = os.path.join(os.path.dirname(img_file), new_filename) if img_file not in local_markdown_updates: local_markdown_updates[img_file] = { 'type': 'image', 'old_filename': filename, 'new_filename': new_filename, 'new_path': new_img_file } print(f" ✓ 已确认修改: {rel_path} -> {new_rel_path}") # 立即执行此单元的修改 execute_single_unit_changes( minio_client, bucket_name, object_name, new_object_name, minio_affected_markdowns, local_affected_markdowns, local_affected_images, filename, new_filename ) else: print(f" ✗ 已取消修改: {rel_path}") else: # 打印已处理的文件(已经是目标后缀) rel_path = object_name.replace(f"{base_path}/{season_dir}/", "") print(f" 已是目标后缀{date_suffix}: {rel_path}") except S3Error as exc: print(f"处理目录 {current_path} 时发生错误: {exc}") return rename_mappings def show_markdown_changes(minio_client, bucket_name, md_file, old_filename, new_filename, source): """ 显示markdown文件中的具体修改内容 """ try: # 下载markdown文件内容 response = minio_client.get_object(bucket_name, md_file) content = response.read().decode('utf-8') response.close() response.release_conn() # 查找包含旧文件名的行 lines = content.split('\n') for i, line in enumerate(lines): if old_filename in line: # 显示修改前后的内容 old_line = line new_line = line.replace(old_filename, new_filename) print(f" 行 {i + 1}:") print(f" 原内容: {old_line}") print(f" 新内容: {new_line}") except S3Error as exc: print(f" 获取{source}markdown文件内容时发生错误: {exc}") def show_markdown_changes_local(md_file, old_filename, new_filename, source): """ 显示本地markdown文件中的具体修改内容 """ try: # 读取markdown文件内容 with open(md_file, 'r', encoding='utf-8') as f: content = f.read() # 查找包含旧文件名的行 lines = content.split('\n') for i, line in enumerate(lines): if old_filename in line: # 显示修改前后的内容 old_line = line new_line = line.replace(old_filename, new_filename) print(f" 行 {i + 1}:") print(f" 原内容: {old_line}") print(f" 新内容: {new_line}") except Exception as exc: print(f" 获取{source}markdown文件内容时发生错误: {exc}") def execute_single_unit_changes( minio_client, bucket_name, old_object_name, new_object_name, minio_affected_markdowns, local_affected_markdowns, local_affected_images, old_filename, new_filename ): """ 执行单个单元的修改 """ try: # 1. 重命名MinIO中的图片文件 # 使用正确的CopySource格式 copy_source = {"bucket": bucket_name, "object": old_object_name} # 尝试使用新的API调用方式 try: # 方法1: 使用copy_object方法 minio_client.copy_object(bucket_name, new_object_name, copy_source) print( f" ✓ MinIO图片重命名完成: {os.path.basename(old_object_name)} -> {os.path.basename(new_object_name)}") except Exception as copy_exc: # 如果方法1失败,尝试方法2: 使用get_object和put_object组合 try: print(f" 尝试备用方法重命名MinIO图片...") # 先下载对象 response = minio_client.get_object(bucket_name, old_object_name) object_data = response.read() response.close() response.release_conn() # 再上传为新对象,需要包装成BytesIO from io import BytesIO minio_client.put_object( bucket_name, new_object_name, io.BytesIO(object_data), # 包装成BytesIO对象 len(object_data) ) print( f" ✓ MinIO图片重命名完成(备用方法): {os.path.basename(old_object_name)} -> {os.path.basename(new_object_name)}") except Exception as alt_exc: print(f" ✗ MinIO图片重命名失败 {old_object_name}: {alt_exc}") # 如果两种方法都失败,跳过这个文件的后续操作 return # 删除原对象 try: minio_client.remove_object(bucket_name, old_object_name) except S3Error as rm_exc: print(f" ✗ 删除MinIO原对象失败 {old_object_name}: {rm_exc}") # 2. 更新MinIO中的markdown文件 for md_file in minio_affected_markdowns: try: print(f" → 正在处理MinIO markdown: {os.path.basename(md_file)}") # 下载markdown文件内容 response = minio_client.get_object(bucket_name, md_file) content = response.read().decode('utf-8') response.close() response.release_conn() # 显示更新前的内容片段(调试用) print(f" 原内容包含旧文件名: {old_filename in content}") # 替换所有匹配的内容 new_content = content.replace(old_filename, new_filename) # 验证替换是否生效 if new_content == content: print(f" ⚠ 警告: 内容未发生变化,可能未找到匹配项") else: print(f" 内容已替换: {old_filename} → {new_filename}") # 上传修改后的内容 content_bytes = new_content.encode('utf-8') # 先删除旧文件(某些MinIO版本需要先删除) try: minio_client.remove_object(bucket_name, md_file) print(f" 已删除旧markdown文件") except Exception as rm_exc: print(f" 删除旧markdown文件时出错(继续执行): {rm_exc}") # 重新上传 minio_client.put_object( bucket_name, md_file, io.BytesIO(content_bytes), length=len(content_bytes), content_type='text/plain; charset=utf-8' ) # 验证上传结果 verify_response = minio_client.get_object(bucket_name, md_file) verify_content = verify_response.read().decode('utf-8') verify_response.close() verify_response.release_conn() if new_filename in verify_content: print(f" ✓ MinIO markdown更新完成并验证成功: {os.path.basename(md_file)}") else: print(f" ✗ MinIO markdown验证失败: 更新后仍未包含新文件名") except S3Error as exc: print(f" ✗ MinIO markdown更新失败 {md_file}: {exc}") except Exception as exc: print(f" ✗ MinIO markdown更新失败(未知错误) {md_file}: {exc}") # 3. 重命名本地图片文件 for img_file in local_affected_images: try: new_img_file = os.path.join(os.path.dirname(img_file), new_filename) os.rename(img_file, new_img_file) print(f" ✓ 本地图片重命名完成: {os.path.basename(img_file)} -> {os.path.basename(new_filename)}") except Exception as exc: print(f" ✗ 本地图片重命名失败 {img_file}: {exc}") # 4. 更新本地markdown文件 for md_file in local_affected_markdowns: try: # 读取markdown文件内容 with open(md_file, 'r', encoding='utf-8') as f: content = f.read() # 替换所有匹配的内容 content = content.replace(old_filename, new_filename) # 写入修改后的内容 with open(md_file, 'w', encoding='utf-8') as f: f.write(content) print(f" ✓ 本地markdown更新完成: {os.path.basename(md_file)}") except Exception as exc: print(f" ✗ 本地markdown更新失败 {md_file}: {exc}") except Exception as exc: print(f" ✗ 执行单元修改时发生错误: {exc}") def find_minio_markdown_files(minio_client, bucket_name, base_path): """ 查找MinIO中所有markdown文件 """ markdown_files = [] try: # 递归查找所有markdown文件 objects = minio_client.list_objects(bucket_name, prefix=base_path, recursive=True) for obj in objects: object_name = obj.object_name if object_name.endswith('_md.txt') or object_name.endswith('_md.md'): markdown_files.append(object_name) except S3Error as exc: print(f"查找MinIO中markdown文件时发生错误: {exc}") print(f"找到 {len(markdown_files)} 个MinIO中的markdown文件") return markdown_files def find_local_markdown_files(local_base_path): """ 查找本地所有markdown文件 """ markdown_files = [] try: # 递归查找所有markdown文件 for root, dirs, files in os.walk(local_base_path): for file in files: if file.endswith('_md.txt') or file.endswith('_md.md'): markdown_files.append(os.path.join(root, file)) except Exception as exc: print(f"查找本地markdown文件时发生错误: {exc}") print(f"找到 {len(markdown_files)} 个本地markdown文件") return markdown_files def confirm_single_change(change_type): """ 确认单个更改 """ while True: response = input(f"是否确认此{change_type}? (y/n): ").strip().lower() if response in ['y', 'yes']: return True elif response in ['n', 'no']: return False else: print("请输入 'y' 或 'n'") def show_changes_and_confirm(rename_mappings, minio_markdown_updates, local_markdown_updates): """ 显示所有更改并请求用户确认 """ print("\n" + "=" * 80) print("更改摘要:") print("=" * 80) # 显示图片重命名摘要 print(f"\n1. MinIO图片文件重命名 ({len(rename_mappings)} 个文件):") for i, (old_path, new_path) in enumerate(list(rename_mappings.items())): print(f" {i + 1}. {os.path.basename(old_path)} -> {os.path.basename(new_path)}") # 显示MinIO markdown更新摘要 print(f"\n2. MinIO Markdown文件更新 ({len(minio_markdown_updates)} 个文件):") for i, (md_path, update_info) in enumerate(list(minio_markdown_updates.items())): print( f" {i + 1}. {os.path.basename(md_path)}: {update_info['old_filename']} -> {update_info['new_filename']}") # 显示本地markdown更新摘要 print(f"\n3. 本地Markdown文件更新 ({len(local_markdown_updates)} 个文件):") for i, (md_path, update_info) in enumerate(list(local_markdown_updates.items())): if update_info.get('type') == 'markdown': print( f" {i + 1}. {os.path.basename(md_path)}: {update_info['old_filename']} -> {update_info['new_filename']}") elif update_info.get('type') == 'image': # 这是本地图片文件 print(f" {i + 1}. {os.path.basename(md_path)} -> {update_info['new_filename']}: 图片重命名") else: # 兼容旧格式 if md_path.endswith('.txt') or md_path.endswith('.md'): print( f" {i + 1}. {os.path.basename(md_path)}: {update_info['old_filename']} -> {update_info['new_filename']}") else: print(f" {i + 1}. {os.path.basename(md_path)}: 图片重命名") # 请求用户确认 print("\n" + "=" * 80) print( f"总计: {len(rename_mappings)} 个MinIO图片文件将被重命名, {len(minio_markdown_updates)} 个MinIO markdown文件将被更新, {len(local_markdown_updates)} 个本地文件将被更新") print("=" * 80) while True: response = input("\n是否执行这些更改? (y/n): ").strip().lower() if response in ['y', 'yes']: return True elif response in ['n', 'no']: return False else: print("请输入 'y' 或 'n'") def execute_changes(minio_client, bucket_name, rename_mappings, minio_markdown_updates, local_markdown_updates): """ 执行所有更改(这里实际上已经执行过了,只是显示完成信息) """ print("\n所有更改已完成!") def is_image_file(filename): """ 检查文件是否是图片 """ image_extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG', '.gif', '.GIF', '.bmp', '.BMP'] return any(filename.lower().endswith(ext) for ext in image_extensions) def main(): """ 主函数 """ # 获取日期后缀参数 import sys if len(sys.argv) > 1: date_suffix = sys.argv[1] if not date_suffix.startswith('_'): date_suffix = '_' + date_suffix else: date_suffix = '_1114' # 默认值 print(f"使用日期后缀: {date_suffix}") print("提示: 可以通过命令行参数修改后缀,例如: python script.py _1203") print("=" * 80) # 初始化MinIO客户端 minio_client = Minio( "1.13.185.116:9000", access_key="rag_flow", # 替换为您的access key secret_key="infini_rag_flow", # 替换为您的secret key secure=False ) # 配置参数 bucket_name = "exhibit-photo" # 替换为您的bucket名称 base_path = "bj_yuanlin/category" # MinIO中的基础路径 season_dirs = ["chun_pic_md", "xia_pic_md", "qiu_pic_md", "dong_pic_md"] # 季节目录 local_base_path = "." # 本地基础路径,可以根据需要修改 print("\n开始扫描MinIO和本地文件系统...") print(f"MinIO基础路径: {base_path}") print(f"季节目录: {', '.join(season_dirs)}") print(f"本地基础路径: {local_base_path}") print(f"日期后缀: {date_suffix}") try: rename_mappings = process_minio_and_local_objects( minio_client, bucket_name, base_path, season_dirs, local_base_path, date_suffix ) if rename_mappings: print(f"\n处理完成!共重命名 {len(rename_mappings)} 个MinIO图片文件") else: print(f"\n未执行任何更改") except Exception as e: print(f"处理过程中出现错误: {e}") if __name__ == "__main__": main()