Files
museum_admin/add_1114_to_minio_file.py

673 lines
28 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
import os
import re
import io
from minio import Minio
from minio.error import S3Error
def process_minio_and_local_objects(minio_client, bucket_name, base_path, season_dirs, local_base_path, date_suffix):
"""
处理MinIO和本地文件系统中的对象为图片文件添加日期后缀并更新markdown文件中的引用
:param date_suffix: 日期后缀例如 '_1114'
"""
# 记录所有重命名映射关系 {旧对象键: 新对象键}
rename_mappings = {}
# 记录所有需要更新的markdown文件及其更新内容
minio_markdown_updates = {}
local_markdown_updates = {}
# 先收集所有markdown文件的内容构建索引
minio_markdown_index = build_minio_markdown_index(minio_client, bucket_name, base_path)
local_markdown_index = build_local_markdown_index(local_base_path)
# 构建本地图片文件索引
local_image_index = build_local_image_index(local_base_path)
for season_dir in season_dirs:
print(f"\n处理季节目录: {season_dir} {base_path}")
# 构造完整路径
full_path = f"{base_path}/{season_dir}/"
# 递归处理季节目录下的所有子目录和图片
season_rename_mappings = process_directory_recursive(
minio_client, bucket_name, full_path, season_dir, base_path,
minio_markdown_index, local_markdown_index, local_image_index,
minio_markdown_updates, local_markdown_updates, local_base_path, date_suffix
)
rename_mappings.update(season_rename_mappings)
# 显示所有更改并请求确认
if show_changes_and_confirm(rename_mappings, minio_markdown_updates, local_markdown_updates):
# 用户确认后执行所有更改
execute_changes(minio_client, bucket_name, rename_mappings, minio_markdown_updates, local_markdown_updates)
return rename_mappings
else:
print("\n操作已取消,未执行任何更改。")
return {}
def build_minio_markdown_index(minio_client, bucket_name, base_path):
"""
构建MinIO中markdown文件索引用于快速查找引用关系
"""
print("构建MinIO中markdown文件索引...")
markdown_index = {}
# 查找所有markdown文件
markdown_files = find_minio_markdown_files(minio_client, bucket_name, base_path)
for md_file in markdown_files:
try:
# 下载markdown文件内容
response = minio_client.get_object(bucket_name, md_file)
content = response.read().decode('utf-8')
response.close()
response.release_conn()
# 提取所有图片引用
image_refs = extract_image_references(content)
# 为每个引用的图片记录markdown文件
for image_ref in image_refs:
if image_ref not in markdown_index:
markdown_index[image_ref] = []
markdown_index[image_ref].append(md_file)
except S3Error as exc:
print(f"处理MinIO中markdown文件 {md_file} 时发生错误: {exc}")
print(f"MinIO索引构建完成共找到 {len(markdown_index)} 个图片引用")
return markdown_index
def build_local_markdown_index(local_base_path):
"""
构建本地markdown文件索引用于快速查找引用关系
"""
print("构建本地markdown文件索引...")
markdown_index = {}
# 查找所有markdown文件
markdown_files = find_local_markdown_files(local_base_path)
for md_file in markdown_files:
try:
# 读取markdown文件内容
with open(md_file, 'r', encoding='utf-8') as f:
content = f.read()
# 提取所有图片引用
image_refs = extract_image_references(content)
# 为每个引用的图片记录markdown文件
for image_ref in image_refs:
if image_ref not in markdown_index:
markdown_index[image_ref] = []
markdown_index[image_ref].append(md_file)
except Exception as exc:
print(f"处理本地markdown文件 {md_file} 时发生错误: {exc}")
print(f"本地索引构建完成,共找到 {len(markdown_index)} 个图片引用")
return markdown_index
def build_local_image_index(local_base_path):
"""
构建本地图片文件索引
"""
print("构建本地图片文件索引...")
image_index = {}
# 递归查找所有图片文件
for root, dirs, files in os.walk(local_base_path):
for file in files:
if is_image_file(file):
filename = os.path.basename(file)
if filename not in image_index:
image_index[filename] = []
image_index[filename].append(os.path.join(root, file))
print(f"本地图片索引构建完成,共找到 {len(image_index)} 个图片文件")
return image_index
def extract_image_references(content):
"""
从markdown内容中提取所有图片引用
"""
# 匹配markdown图片语法 ![](url)
pattern = r'!\[\]\(([^)]+)\)'
matches = re.findall(pattern, content)
# 提取图片文件名去掉URL前缀
image_refs = []
for match in matches:
# 从URL中提取文件名
filename = os.path.basename(match)
if filename:
image_refs.append(filename)
return image_refs
def process_directory_recursive(
minio_client, bucket_name, current_path, season_dir, base_path,
minio_markdown_index, local_markdown_index, local_image_index,
minio_markdown_updates, local_markdown_updates, local_base_path, date_suffix
):
"""
递归处理目录及其所有子目录
:param date_suffix: 日期后缀例如 '_1114'
"""
rename_mappings = {}
try:
# 列出当前目录下的所有对象
objects = minio_client.list_objects(bucket_name, prefix=current_path, recursive=False)
for obj in objects:
object_name = obj.object_name
# 如果是目录,递归处理
if object_name.endswith('/'):
subdir_rename_mappings = process_directory_recursive(
minio_client, bucket_name, object_name, season_dir, base_path,
minio_markdown_index, local_markdown_index, local_image_index,
minio_markdown_updates, local_markdown_updates, local_base_path, date_suffix
)
rename_mappings.update(subdir_rename_mappings)
else:
# 处理文件
if is_image_file(object_name):
# 分离文件名和扩展名
filename = os.path.basename(object_name)
name_part, ext_part = os.path.splitext(filename)
# 检查是否已经包含类似的日期后缀格式_数字
date_pattern = r'_\d{4}$'
need_rename = False
new_filename = None # 初始化变量
if re.search(date_pattern, name_part):
# 已有日期后缀,替换为新的后缀
new_name_part = re.sub(date_pattern, date_suffix, name_part)
new_filename = f"{new_name_part}{ext_part}"
need_rename = True
elif not name_part.endswith(date_suffix):
# 没有日期后缀,添加新后缀
new_filename = f"{name_part}{date_suffix}{ext_part}"
need_rename = True
if need_rename and new_filename:
# 生成新的文件名(添加或替换日期后缀)
# 构造新的完整对象路径
dir_path = os.path.dirname(object_name)
new_object_name = f"{dir_path}/{new_filename}" if dir_path else new_filename
# 查找引用此图片的markdown文件
minio_affected_markdowns = minio_markdown_index.get(filename, [])
local_affected_markdowns = local_markdown_index.get(filename, [])
# 查找本地对应的图片文件
local_affected_images = local_image_index.get(filename, [])
# 显示修改提示并请求确认
rel_path = object_name.replace(f"{base_path}/{season_dir}/", "")
new_rel_path = new_object_name.replace(f"{base_path}/{season_dir}/", "")
# 移除缩进,这段代码已经在 if need_rename 块内
print(f"\n{'=' * 80}")
print(f"发现需要重命名: {rel_path} -> {new_rel_path}")
print(f"{'=' * 80}")
# 显示MinIO中受影响的markdown文件及其具体修改内容
if minio_affected_markdowns:
print(f"MinIO中此图片被以下 {len(minio_affected_markdowns)} 个markdown文件引用:")
for md_file in minio_affected_markdowns:
md_filename = os.path.basename(md_file)
print(f" - {md_filename}:")
# 获取并显示具体修改内容
show_markdown_changes(minio_client, bucket_name, md_file, filename, new_filename,
"MinIO")
# 显示本地受影响的markdown文件及其具体修改内容
if local_affected_markdowns:
print(f"本地此图片被以下 {len(local_affected_markdowns)} 个markdown文件引用:")
for md_file in local_affected_markdowns:
md_filename = os.path.basename(md_file)
print(f" - {md_filename}:")
# 获取并显示具体修改内容
show_markdown_changes_local(md_file, filename, new_filename, "本地")
# 显示本地受影响的图片文件
if local_affected_images:
print(f"本地有以下 {len(local_affected_images)} 个同名图片文件需要重命名:")
for img_file in local_affected_images:
rel_img_path = os.path.relpath(img_file, local_base_path)
new_img_file = os.path.join(os.path.dirname(img_file), new_filename)
rel_new_img_path = os.path.relpath(new_img_file, local_base_path)
print(f" - {rel_img_path} -> {rel_new_img_path}")
# 确认修改
if confirm_single_change("MinIO和本地文件修改"):
# 记录重命名映射
rename_mappings[object_name] = new_object_name
# 记录MinIO markdown更新
for md_file in minio_affected_markdowns:
if md_file not in minio_markdown_updates:
minio_markdown_updates[md_file] = {
'old_filename': filename,
'new_filename': new_filename
}
# 记录本地markdown更新
for md_file in local_affected_markdowns:
if md_file not in local_markdown_updates:
local_markdown_updates[md_file] = {
'type': 'markdown',
'old_filename': filename,
'new_filename': new_filename
}
# 记录本地图片重命名(使用单独的字典或标记类型)
for img_file in local_affected_images:
new_img_file = os.path.join(os.path.dirname(img_file), new_filename)
if img_file not in local_markdown_updates:
local_markdown_updates[img_file] = {
'type': 'image',
'old_filename': filename,
'new_filename': new_filename,
'new_path': new_img_file
}
print(f" ✓ 已确认修改: {rel_path} -> {new_rel_path}")
# 立即执行此单元的修改
execute_single_unit_changes(
minio_client, bucket_name, object_name, new_object_name,
minio_affected_markdowns, local_affected_markdowns,
local_affected_images, filename, new_filename
)
else:
print(f" ✗ 已取消修改: {rel_path}")
else:
# 打印已处理的文件(已经是目标后缀)
rel_path = object_name.replace(f"{base_path}/{season_dir}/", "")
print(f" 已是目标后缀{date_suffix}: {rel_path}")
except S3Error as exc:
print(f"处理目录 {current_path} 时发生错误: {exc}")
return rename_mappings
def show_markdown_changes(minio_client, bucket_name, md_file, old_filename, new_filename, source):
"""
显示markdown文件中的具体修改内容
"""
try:
# 下载markdown文件内容
response = minio_client.get_object(bucket_name, md_file)
content = response.read().decode('utf-8')
response.close()
response.release_conn()
# 查找包含旧文件名的行
lines = content.split('\n')
for i, line in enumerate(lines):
if old_filename in line:
# 显示修改前后的内容
old_line = line
new_line = line.replace(old_filename, new_filename)
print(f"{i + 1}:")
print(f" 原内容: {old_line}")
print(f" 新内容: {new_line}")
except S3Error as exc:
print(f" 获取{source}markdown文件内容时发生错误: {exc}")
def show_markdown_changes_local(md_file, old_filename, new_filename, source):
"""
显示本地markdown文件中的具体修改内容
"""
try:
# 读取markdown文件内容
with open(md_file, 'r', encoding='utf-8') as f:
content = f.read()
# 查找包含旧文件名的行
lines = content.split('\n')
for i, line in enumerate(lines):
if old_filename in line:
# 显示修改前后的内容
old_line = line
new_line = line.replace(old_filename, new_filename)
print(f"{i + 1}:")
print(f" 原内容: {old_line}")
print(f" 新内容: {new_line}")
except Exception as exc:
print(f" 获取{source}markdown文件内容时发生错误: {exc}")
def execute_single_unit_changes(
minio_client, bucket_name, old_object_name, new_object_name,
minio_affected_markdowns, local_affected_markdowns,
local_affected_images, old_filename, new_filename
):
"""
执行单个单元的修改
"""
try:
# 1. 重命名MinIO中的图片文件
# 使用正确的CopySource格式
copy_source = {"bucket": bucket_name, "object": old_object_name}
# 尝试使用新的API调用方式
try:
# 方法1: 使用copy_object方法
minio_client.copy_object(bucket_name, new_object_name, copy_source)
print(
f" ✓ MinIO图片重命名完成: {os.path.basename(old_object_name)} -> {os.path.basename(new_object_name)}")
except Exception as copy_exc:
# 如果方法1失败尝试方法2: 使用get_object和put_object组合
try:
print(f" 尝试备用方法重命名MinIO图片...")
# 先下载对象
response = minio_client.get_object(bucket_name, old_object_name)
object_data = response.read()
response.close()
response.release_conn()
# 再上传为新对象需要包装成BytesIO
from io import BytesIO
minio_client.put_object(
bucket_name,
new_object_name,
io.BytesIO(object_data), # 包装成BytesIO对象
len(object_data)
)
print(
f" ✓ MinIO图片重命名完成(备用方法): {os.path.basename(old_object_name)} -> {os.path.basename(new_object_name)}")
except Exception as alt_exc:
print(f" ✗ MinIO图片重命名失败 {old_object_name}: {alt_exc}")
# 如果两种方法都失败,跳过这个文件的后续操作
return
# 删除原对象
try:
minio_client.remove_object(bucket_name, old_object_name)
except S3Error as rm_exc:
print(f" ✗ 删除MinIO原对象失败 {old_object_name}: {rm_exc}")
# 2. 更新MinIO中的markdown文件
for md_file in minio_affected_markdowns:
try:
print(f" → 正在处理MinIO markdown: {os.path.basename(md_file)}")
# 下载markdown文件内容
response = minio_client.get_object(bucket_name, md_file)
content = response.read().decode('utf-8')
response.close()
response.release_conn()
# 显示更新前的内容片段(调试用)
print(f" 原内容包含旧文件名: {old_filename in content}")
# 替换所有匹配的内容
new_content = content.replace(old_filename, new_filename)
# 验证替换是否生效
if new_content == content:
print(f" ⚠ 警告: 内容未发生变化,可能未找到匹配项")
else:
print(f" 内容已替换: {old_filename}{new_filename}")
# 上传修改后的内容
content_bytes = new_content.encode('utf-8')
# 先删除旧文件某些MinIO版本需要先删除
try:
minio_client.remove_object(bucket_name, md_file)
print(f" 已删除旧markdown文件")
except Exception as rm_exc:
print(f" 删除旧markdown文件时出错继续执行: {rm_exc}")
# 重新上传
minio_client.put_object(
bucket_name,
md_file,
io.BytesIO(content_bytes),
length=len(content_bytes),
content_type='text/plain; charset=utf-8'
)
# 验证上传结果
verify_response = minio_client.get_object(bucket_name, md_file)
verify_content = verify_response.read().decode('utf-8')
verify_response.close()
verify_response.release_conn()
if new_filename in verify_content:
print(f" ✓ MinIO markdown更新完成并验证成功: {os.path.basename(md_file)}")
else:
print(f" ✗ MinIO markdown验证失败: 更新后仍未包含新文件名")
except S3Error as exc:
print(f" ✗ MinIO markdown更新失败 {md_file}: {exc}")
except Exception as exc:
print(f" ✗ MinIO markdown更新失败未知错误 {md_file}: {exc}")
# 3. 重命名本地图片文件
for img_file in local_affected_images:
try:
new_img_file = os.path.join(os.path.dirname(img_file), new_filename)
os.rename(img_file, new_img_file)
print(f" ✓ 本地图片重命名完成: {os.path.basename(img_file)} -> {os.path.basename(new_filename)}")
except Exception as exc:
print(f" ✗ 本地图片重命名失败 {img_file}: {exc}")
# 4. 更新本地markdown文件
for md_file in local_affected_markdowns:
try:
# 读取markdown文件内容
with open(md_file, 'r', encoding='utf-8') as f:
content = f.read()
# 替换所有匹配的内容
content = content.replace(old_filename, new_filename)
# 写入修改后的内容
with open(md_file, 'w', encoding='utf-8') as f:
f.write(content)
print(f" ✓ 本地markdown更新完成: {os.path.basename(md_file)}")
except Exception as exc:
print(f" ✗ 本地markdown更新失败 {md_file}: {exc}")
except Exception as exc:
print(f" ✗ 执行单元修改时发生错误: {exc}")
def find_minio_markdown_files(minio_client, bucket_name, base_path):
"""
查找MinIO中所有markdown文件
"""
markdown_files = []
try:
# 递归查找所有markdown文件
objects = minio_client.list_objects(bucket_name, prefix=base_path, recursive=True)
for obj in objects:
object_name = obj.object_name
if object_name.endswith('_md.txt') or object_name.endswith('_md.md'):
markdown_files.append(object_name)
except S3Error as exc:
print(f"查找MinIO中markdown文件时发生错误: {exc}")
print(f"找到 {len(markdown_files)} 个MinIO中的markdown文件")
return markdown_files
def find_local_markdown_files(local_base_path):
"""
查找本地所有markdown文件
"""
markdown_files = []
try:
# 递归查找所有markdown文件
for root, dirs, files in os.walk(local_base_path):
for file in files:
if file.endswith('_md.txt') or file.endswith('_md.md'):
markdown_files.append(os.path.join(root, file))
except Exception as exc:
print(f"查找本地markdown文件时发生错误: {exc}")
print(f"找到 {len(markdown_files)} 个本地markdown文件")
return markdown_files
def confirm_single_change(change_type):
"""
确认单个更改
"""
while True:
response = input(f"是否确认此{change_type}? (y/n): ").strip().lower()
if response in ['y', 'yes']:
return True
elif response in ['n', 'no']:
return False
else:
print("请输入 'y''n'")
def show_changes_and_confirm(rename_mappings, minio_markdown_updates, local_markdown_updates):
"""
显示所有更改并请求用户确认
"""
print("\n" + "=" * 80)
print("更改摘要:")
print("=" * 80)
# 显示图片重命名摘要
print(f"\n1. MinIO图片文件重命名 ({len(rename_mappings)} 个文件):")
for i, (old_path, new_path) in enumerate(list(rename_mappings.items())):
print(f" {i + 1}. {os.path.basename(old_path)} -> {os.path.basename(new_path)}")
# 显示MinIO markdown更新摘要
print(f"\n2. MinIO Markdown文件更新 ({len(minio_markdown_updates)} 个文件):")
for i, (md_path, update_info) in enumerate(list(minio_markdown_updates.items())):
print(
f" {i + 1}. {os.path.basename(md_path)}: {update_info['old_filename']} -> {update_info['new_filename']}")
# 显示本地markdown更新摘要
print(f"\n3. 本地Markdown文件更新 ({len(local_markdown_updates)} 个文件):")
for i, (md_path, update_info) in enumerate(list(local_markdown_updates.items())):
if update_info.get('type') == 'markdown':
print(
f" {i + 1}. {os.path.basename(md_path)}: {update_info['old_filename']} -> {update_info['new_filename']}")
elif update_info.get('type') == 'image':
# 这是本地图片文件
print(f" {i + 1}. {os.path.basename(md_path)} -> {update_info['new_filename']}: 图片重命名")
else:
# 兼容旧格式
if md_path.endswith('.txt') or md_path.endswith('.md'):
print(
f" {i + 1}. {os.path.basename(md_path)}: {update_info['old_filename']} -> {update_info['new_filename']}")
else:
print(f" {i + 1}. {os.path.basename(md_path)}: 图片重命名")
# 请求用户确认
print("\n" + "=" * 80)
print(
f"总计: {len(rename_mappings)} 个MinIO图片文件将被重命名, {len(minio_markdown_updates)} 个MinIO markdown文件将被更新, {len(local_markdown_updates)} 个本地文件将被更新")
print("=" * 80)
while True:
response = input("\n是否执行这些更改? (y/n): ").strip().lower()
if response in ['y', 'yes']:
return True
elif response in ['n', 'no']:
return False
else:
print("请输入 'y''n'")
def execute_changes(minio_client, bucket_name, rename_mappings, minio_markdown_updates, local_markdown_updates):
"""
执行所有更改这里实际上已经执行过了只是显示完成信息
"""
print("\n所有更改已完成!")
def is_image_file(filename):
"""
检查文件是否是图片
"""
image_extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG', '.gif', '.GIF', '.bmp', '.BMP']
return any(filename.lower().endswith(ext) for ext in image_extensions)
def main():
"""
主函数
"""
# 获取日期后缀参数
import sys
if len(sys.argv) > 1:
date_suffix = sys.argv[1]
if not date_suffix.startswith('_'):
date_suffix = '_' + date_suffix
else:
date_suffix = '_1114' # 默认值
print(f"使用日期后缀: {date_suffix}")
print("提示: 可以通过命令行参数修改后缀,例如: python script.py _1203")
print("=" * 80)
# 初始化MinIO客户端
minio_client = Minio(
"1.13.185.116:9000",
access_key="rag_flow", # 替换为您的access key
secret_key="infini_rag_flow", # 替换为您的secret key
secure=False
)
# 配置参数
bucket_name = "exhibit-photo" # 替换为您的bucket名称
base_path = "bj_yuanlin/category" # MinIO中的基础路径
season_dirs = ["chun_pic_md", "xia_pic_md", "qiu_pic_md", "dong_pic_md"] # 季节目录
local_base_path = "." # 本地基础路径,可以根据需要修改
print("\n开始扫描MinIO和本地文件系统...")
print(f"MinIO基础路径: {base_path}")
print(f"季节目录: {', '.join(season_dirs)}")
print(f"本地基础路径: {local_base_path}")
print(f"日期后缀: {date_suffix}")
try:
rename_mappings = process_minio_and_local_objects(
minio_client, bucket_name, base_path, season_dirs, local_base_path, date_suffix
)
if rename_mappings:
print(f"\n处理完成!共重命名 {len(rename_mappings)} 个MinIO图片文件")
else:
print(f"\n未执行任何更改")
except Exception as e:
print(f"处理过程中出现错误: {e}")
if __name__ == "__main__":
main()