Files
museum_admin/add_1114_to_minio_file.py

673 lines
28 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
import os
import re
import io
from minio import Minio
from minio.error import S3Error
def process_minio_and_local_objects(minio_client, bucket_name, base_path, season_dirs, local_base_path, date_suffix):
"""
处理MinIO和本地文件系统中的对象为图片文件添加日期后缀并更新markdown文件中的引用
:param date_suffix: 日期后缀,例如 '_1114'
"""
# 记录所有重命名映射关系 {旧对象键: 新对象键}
rename_mappings = {}
# 记录所有需要更新的markdown文件及其更新内容
minio_markdown_updates = {}
local_markdown_updates = {}
# 先收集所有markdown文件的内容构建索引
minio_markdown_index = build_minio_markdown_index(minio_client, bucket_name, base_path)
local_markdown_index = build_local_markdown_index(local_base_path)
# 构建本地图片文件索引
local_image_index = build_local_image_index(local_base_path)
for season_dir in season_dirs:
print(f"\n处理季节目录: {season_dir} {base_path}")
# 构造完整路径
full_path = f"{base_path}/{season_dir}/"
# 递归处理季节目录下的所有子目录和图片
season_rename_mappings = process_directory_recursive(
minio_client, bucket_name, full_path, season_dir, base_path,
minio_markdown_index, local_markdown_index, local_image_index,
minio_markdown_updates, local_markdown_updates, local_base_path, date_suffix
)
rename_mappings.update(season_rename_mappings)
# 显示所有更改并请求确认
if show_changes_and_confirm(rename_mappings, minio_markdown_updates, local_markdown_updates):
# 用户确认后执行所有更改
execute_changes(minio_client, bucket_name, rename_mappings, minio_markdown_updates, local_markdown_updates)
return rename_mappings
else:
print("\n操作已取消,未执行任何更改。")
return {}
def build_minio_markdown_index(minio_client, bucket_name, base_path):
"""
构建MinIO中markdown文件索引用于快速查找引用关系
"""
print("构建MinIO中markdown文件索引...")
markdown_index = {}
# 查找所有markdown文件
markdown_files = find_minio_markdown_files(minio_client, bucket_name, base_path)
for md_file in markdown_files:
try:
# 下载markdown文件内容
response = minio_client.get_object(bucket_name, md_file)
content = response.read().decode('utf-8')
response.close()
response.release_conn()
# 提取所有图片引用
image_refs = extract_image_references(content)
# 为每个引用的图片记录markdown文件
for image_ref in image_refs:
if image_ref not in markdown_index:
markdown_index[image_ref] = []
markdown_index[image_ref].append(md_file)
except S3Error as exc:
print(f"处理MinIO中markdown文件 {md_file} 时发生错误: {exc}")
print(f"MinIO索引构建完成共找到 {len(markdown_index)} 个图片引用")
return markdown_index
def build_local_markdown_index(local_base_path):
"""
构建本地markdown文件索引用于快速查找引用关系
"""
print("构建本地markdown文件索引...")
markdown_index = {}
# 查找所有markdown文件
markdown_files = find_local_markdown_files(local_base_path)
for md_file in markdown_files:
try:
# 读取markdown文件内容
with open(md_file, 'r', encoding='utf-8') as f:
content = f.read()
# 提取所有图片引用
image_refs = extract_image_references(content)
# 为每个引用的图片记录markdown文件
for image_ref in image_refs:
if image_ref not in markdown_index:
markdown_index[image_ref] = []
markdown_index[image_ref].append(md_file)
except Exception as exc:
print(f"处理本地markdown文件 {md_file} 时发生错误: {exc}")
print(f"本地索引构建完成,共找到 {len(markdown_index)} 个图片引用")
return markdown_index
def build_local_image_index(local_base_path):
"""
构建本地图片文件索引
"""
print("构建本地图片文件索引...")
image_index = {}
# 递归查找所有图片文件
for root, dirs, files in os.walk(local_base_path):
for file in files:
if is_image_file(file):
filename = os.path.basename(file)
if filename not in image_index:
image_index[filename] = []
image_index[filename].append(os.path.join(root, file))
print(f"本地图片索引构建完成,共找到 {len(image_index)} 个图片文件")
return image_index
def extract_image_references(content):
"""
从markdown内容中提取所有图片引用
"""
# 匹配markdown图片语法 ![](url)
pattern = r'!\[\]\(([^)]+)\)'
matches = re.findall(pattern, content)
# 提取图片文件名去掉URL前缀
image_refs = []
for match in matches:
# 从URL中提取文件名
filename = os.path.basename(match)
if filename:
image_refs.append(filename)
return image_refs
def process_directory_recursive(
minio_client, bucket_name, current_path, season_dir, base_path,
minio_markdown_index, local_markdown_index, local_image_index,
minio_markdown_updates, local_markdown_updates, local_base_path, date_suffix
):
"""
递归处理目录及其所有子目录
:param date_suffix: 日期后缀,例如 '_1114'
"""
rename_mappings = {}
try:
# 列出当前目录下的所有对象
objects = minio_client.list_objects(bucket_name, prefix=current_path, recursive=False)
for obj in objects:
object_name = obj.object_name
# 如果是目录,递归处理
if object_name.endswith('/'):
subdir_rename_mappings = process_directory_recursive(
minio_client, bucket_name, object_name, season_dir, base_path,
minio_markdown_index, local_markdown_index, local_image_index,
minio_markdown_updates, local_markdown_updates, local_base_path, date_suffix
)
rename_mappings.update(subdir_rename_mappings)
else:
# 处理文件
if is_image_file(object_name):
# 分离文件名和扩展名
filename = os.path.basename(object_name)
name_part, ext_part = os.path.splitext(filename)
# 检查是否已经包含类似的日期后缀格式_数字
date_pattern = r'_\d{4}$'
need_rename = False
new_filename = None # 初始化变量
if re.search(date_pattern, name_part):
# 已有日期后缀,替换为新的后缀
new_name_part = re.sub(date_pattern, date_suffix, name_part)
new_filename = f"{new_name_part}{ext_part}"
need_rename = True
elif not name_part.endswith(date_suffix):
# 没有日期后缀,添加新后缀
new_filename = f"{name_part}{date_suffix}{ext_part}"
need_rename = True
if need_rename and new_filename:
# 生成新的文件名(添加或替换日期后缀)
# 构造新的完整对象路径
dir_path = os.path.dirname(object_name)
new_object_name = f"{dir_path}/{new_filename}" if dir_path else new_filename
# 查找引用此图片的markdown文件
minio_affected_markdowns = minio_markdown_index.get(filename, [])
local_affected_markdowns = local_markdown_index.get(filename, [])
# 查找本地对应的图片文件
local_affected_images = local_image_index.get(filename, [])
# 显示修改提示并请求确认
rel_path = object_name.replace(f"{base_path}/{season_dir}/", "")
new_rel_path = new_object_name.replace(f"{base_path}/{season_dir}/", "")
# 移除缩进,这段代码已经在 if need_rename 块内
print(f"\n{'=' * 80}")
print(f"发现需要重命名: {rel_path} -> {new_rel_path}")
print(f"{'=' * 80}")
# 显示MinIO中受影响的markdown文件及其具体修改内容
if minio_affected_markdowns:
print(f"MinIO中此图片被以下 {len(minio_affected_markdowns)} 个markdown文件引用:")
for md_file in minio_affected_markdowns:
md_filename = os.path.basename(md_file)
print(f" - {md_filename}:")
# 获取并显示具体修改内容
show_markdown_changes(minio_client, bucket_name, md_file, filename, new_filename,
"MinIO")
# 显示本地受影响的markdown文件及其具体修改内容
if local_affected_markdowns:
print(f"本地此图片被以下 {len(local_affected_markdowns)} 个markdown文件引用:")
for md_file in local_affected_markdowns:
md_filename = os.path.basename(md_file)
print(f" - {md_filename}:")
# 获取并显示具体修改内容
show_markdown_changes_local(md_file, filename, new_filename, "本地")
# 显示本地受影响的图片文件
if local_affected_images:
print(f"本地有以下 {len(local_affected_images)} 个同名图片文件需要重命名:")
for img_file in local_affected_images:
rel_img_path = os.path.relpath(img_file, local_base_path)
new_img_file = os.path.join(os.path.dirname(img_file), new_filename)
rel_new_img_path = os.path.relpath(new_img_file, local_base_path)
print(f" - {rel_img_path} -> {rel_new_img_path}")
# 确认修改
if confirm_single_change("MinIO和本地文件修改"):
# 记录重命名映射
rename_mappings[object_name] = new_object_name
# 记录MinIO markdown更新
for md_file in minio_affected_markdowns:
if md_file not in minio_markdown_updates:
minio_markdown_updates[md_file] = {
'old_filename': filename,
'new_filename': new_filename
}
# 记录本地markdown更新
for md_file in local_affected_markdowns:
if md_file not in local_markdown_updates:
local_markdown_updates[md_file] = {
'type': 'markdown',
'old_filename': filename,
'new_filename': new_filename
}
# 记录本地图片重命名(使用单独的字典或标记类型)
for img_file in local_affected_images:
new_img_file = os.path.join(os.path.dirname(img_file), new_filename)
if img_file not in local_markdown_updates:
local_markdown_updates[img_file] = {
'type': 'image',
'old_filename': filename,
'new_filename': new_filename,
'new_path': new_img_file
}
print(f" ✓ 已确认修改: {rel_path} -> {new_rel_path}")
# 立即执行此单元的修改
execute_single_unit_changes(
minio_client, bucket_name, object_name, new_object_name,
minio_affected_markdowns, local_affected_markdowns,
local_affected_images, filename, new_filename
)
else:
print(f" ✗ 已取消修改: {rel_path}")
else:
# 打印已处理的文件(已经是目标后缀)
rel_path = object_name.replace(f"{base_path}/{season_dir}/", "")
print(f" 已是目标后缀{date_suffix}: {rel_path}")
except S3Error as exc:
print(f"处理目录 {current_path} 时发生错误: {exc}")
return rename_mappings
def show_markdown_changes(minio_client, bucket_name, md_file, old_filename, new_filename, source):
"""
显示markdown文件中的具体修改内容
"""
try:
# 下载markdown文件内容
response = minio_client.get_object(bucket_name, md_file)
content = response.read().decode('utf-8')
response.close()
response.release_conn()
# 查找包含旧文件名的行
lines = content.split('\n')
for i, line in enumerate(lines):
if old_filename in line:
# 显示修改前后的内容
old_line = line
new_line = line.replace(old_filename, new_filename)
print(f"{i + 1}:")
print(f" 原内容: {old_line}")
print(f" 新内容: {new_line}")
except S3Error as exc:
print(f" 获取{source}markdown文件内容时发生错误: {exc}")
def show_markdown_changes_local(md_file, old_filename, new_filename, source):
"""
显示本地markdown文件中的具体修改内容
"""
try:
# 读取markdown文件内容
with open(md_file, 'r', encoding='utf-8') as f:
content = f.read()
# 查找包含旧文件名的行
lines = content.split('\n')
for i, line in enumerate(lines):
if old_filename in line:
# 显示修改前后的内容
old_line = line
new_line = line.replace(old_filename, new_filename)
print(f"{i + 1}:")
print(f" 原内容: {old_line}")
print(f" 新内容: {new_line}")
except Exception as exc:
print(f" 获取{source}markdown文件内容时发生错误: {exc}")
def execute_single_unit_changes(
minio_client, bucket_name, old_object_name, new_object_name,
minio_affected_markdowns, local_affected_markdowns,
local_affected_images, old_filename, new_filename
):
"""
执行单个单元的修改
"""
try:
# 1. 重命名MinIO中的图片文件
# 使用正确的CopySource格式
copy_source = {"bucket": bucket_name, "object": old_object_name}
# 尝试使用新的API调用方式
try:
# 方法1: 使用copy_object方法
minio_client.copy_object(bucket_name, new_object_name, copy_source)
print(
f" ✓ MinIO图片重命名完成: {os.path.basename(old_object_name)} -> {os.path.basename(new_object_name)}")
except Exception as copy_exc:
# 如果方法1失败尝试方法2: 使用get_object和put_object组合
try:
print(f" 尝试备用方法重命名MinIO图片...")
# 先下载对象
response = minio_client.get_object(bucket_name, old_object_name)
object_data = response.read()
response.close()
response.release_conn()
# 再上传为新对象需要包装成BytesIO
from io import BytesIO
minio_client.put_object(
bucket_name,
new_object_name,
io.BytesIO(object_data), # 包装成BytesIO对象
len(object_data)
)
print(
f" ✓ MinIO图片重命名完成(备用方法): {os.path.basename(old_object_name)} -> {os.path.basename(new_object_name)}")
except Exception as alt_exc:
print(f" ✗ MinIO图片重命名失败 {old_object_name}: {alt_exc}")
# 如果两种方法都失败,跳过这个文件的后续操作
return
# 删除原对象
try:
minio_client.remove_object(bucket_name, old_object_name)
except S3Error as rm_exc:
print(f" ✗ 删除MinIO原对象失败 {old_object_name}: {rm_exc}")
# 2. 更新MinIO中的markdown文件
for md_file in minio_affected_markdowns:
try:
print(f" → 正在处理MinIO markdown: {os.path.basename(md_file)}")
# 下载markdown文件内容
response = minio_client.get_object(bucket_name, md_file)
content = response.read().decode('utf-8')
response.close()
response.release_conn()
# 显示更新前的内容片段(调试用)
print(f" 原内容包含旧文件名: {old_filename in content}")
# 替换所有匹配的内容
new_content = content.replace(old_filename, new_filename)
# 验证替换是否生效
if new_content == content:
print(f" ⚠ 警告: 内容未发生变化,可能未找到匹配项")
else:
print(f" 内容已替换: {old_filename}{new_filename}")
# 上传修改后的内容
content_bytes = new_content.encode('utf-8')
# 先删除旧文件某些MinIO版本需要先删除
try:
minio_client.remove_object(bucket_name, md_file)
print(f" 已删除旧markdown文件")
except Exception as rm_exc:
print(f" 删除旧markdown文件时出错继续执行: {rm_exc}")
# 重新上传
minio_client.put_object(
bucket_name,
md_file,
io.BytesIO(content_bytes),
length=len(content_bytes),
content_type='text/plain; charset=utf-8'
)
# 验证上传结果
verify_response = minio_client.get_object(bucket_name, md_file)
verify_content = verify_response.read().decode('utf-8')
verify_response.close()
verify_response.release_conn()
if new_filename in verify_content:
print(f" ✓ MinIO markdown更新完成并验证成功: {os.path.basename(md_file)}")
else:
print(f" ✗ MinIO markdown验证失败: 更新后仍未包含新文件名")
except S3Error as exc:
print(f" ✗ MinIO markdown更新失败 {md_file}: {exc}")
except Exception as exc:
print(f" ✗ MinIO markdown更新失败未知错误 {md_file}: {exc}")
# 3. 重命名本地图片文件
for img_file in local_affected_images:
try:
new_img_file = os.path.join(os.path.dirname(img_file), new_filename)
os.rename(img_file, new_img_file)
print(f" ✓ 本地图片重命名完成: {os.path.basename(img_file)} -> {os.path.basename(new_filename)}")
except Exception as exc:
print(f" ✗ 本地图片重命名失败 {img_file}: {exc}")
# 4. 更新本地markdown文件
for md_file in local_affected_markdowns:
try:
# 读取markdown文件内容
with open(md_file, 'r', encoding='utf-8') as f:
content = f.read()
# 替换所有匹配的内容
content = content.replace(old_filename, new_filename)
# 写入修改后的内容
with open(md_file, 'w', encoding='utf-8') as f:
f.write(content)
print(f" ✓ 本地markdown更新完成: {os.path.basename(md_file)}")
except Exception as exc:
print(f" ✗ 本地markdown更新失败 {md_file}: {exc}")
except Exception as exc:
print(f" ✗ 执行单元修改时发生错误: {exc}")
def find_minio_markdown_files(minio_client, bucket_name, base_path):
"""
查找MinIO中所有markdown文件
"""
markdown_files = []
try:
# 递归查找所有markdown文件
objects = minio_client.list_objects(bucket_name, prefix=base_path, recursive=True)
for obj in objects:
object_name = obj.object_name
if object_name.endswith('_md.txt') or object_name.endswith('_md.md'):
markdown_files.append(object_name)
except S3Error as exc:
print(f"查找MinIO中markdown文件时发生错误: {exc}")
print(f"找到 {len(markdown_files)} 个MinIO中的markdown文件")
return markdown_files
def find_local_markdown_files(local_base_path):
"""
查找本地所有markdown文件
"""
markdown_files = []
try:
# 递归查找所有markdown文件
for root, dirs, files in os.walk(local_base_path):
for file in files:
if file.endswith('_md.txt') or file.endswith('_md.md'):
markdown_files.append(os.path.join(root, file))
except Exception as exc:
print(f"查找本地markdown文件时发生错误: {exc}")
print(f"找到 {len(markdown_files)} 个本地markdown文件")
return markdown_files
def confirm_single_change(change_type):
"""
确认单个更改
"""
while True:
response = input(f"是否确认此{change_type}? (y/n): ").strip().lower()
if response in ['y', 'yes']:
return True
elif response in ['n', 'no']:
return False
else:
print("请输入 'y''n'")
def show_changes_and_confirm(rename_mappings, minio_markdown_updates, local_markdown_updates):
"""
显示所有更改并请求用户确认
"""
print("\n" + "=" * 80)
print("更改摘要:")
print("=" * 80)
# 显示图片重命名摘要
print(f"\n1. MinIO图片文件重命名 ({len(rename_mappings)} 个文件):")
for i, (old_path, new_path) in enumerate(list(rename_mappings.items())):
print(f" {i + 1}. {os.path.basename(old_path)} -> {os.path.basename(new_path)}")
# 显示MinIO markdown更新摘要
print(f"\n2. MinIO Markdown文件更新 ({len(minio_markdown_updates)} 个文件):")
for i, (md_path, update_info) in enumerate(list(minio_markdown_updates.items())):
print(
f" {i + 1}. {os.path.basename(md_path)}: {update_info['old_filename']} -> {update_info['new_filename']}")
# 显示本地markdown更新摘要
print(f"\n3. 本地Markdown文件更新 ({len(local_markdown_updates)} 个文件):")
for i, (md_path, update_info) in enumerate(list(local_markdown_updates.items())):
if update_info.get('type') == 'markdown':
print(
f" {i + 1}. {os.path.basename(md_path)}: {update_info['old_filename']} -> {update_info['new_filename']}")
elif update_info.get('type') == 'image':
# 这是本地图片文件
print(f" {i + 1}. {os.path.basename(md_path)} -> {update_info['new_filename']}: 图片重命名")
else:
# 兼容旧格式
if md_path.endswith('.txt') or md_path.endswith('.md'):
print(
f" {i + 1}. {os.path.basename(md_path)}: {update_info['old_filename']} -> {update_info['new_filename']}")
else:
print(f" {i + 1}. {os.path.basename(md_path)}: 图片重命名")
# 请求用户确认
print("\n" + "=" * 80)
print(
f"总计: {len(rename_mappings)} 个MinIO图片文件将被重命名, {len(minio_markdown_updates)} 个MinIO markdown文件将被更新, {len(local_markdown_updates)} 个本地文件将被更新")
print("=" * 80)
while True:
response = input("\n是否执行这些更改? (y/n): ").strip().lower()
if response in ['y', 'yes']:
return True
elif response in ['n', 'no']:
return False
else:
print("请输入 'y''n'")
def execute_changes(minio_client, bucket_name, rename_mappings, minio_markdown_updates, local_markdown_updates):
"""
执行所有更改(这里实际上已经执行过了,只是显示完成信息)
"""
print("\n所有更改已完成!")
def is_image_file(filename):
"""
检查文件是否是图片
"""
image_extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG', '.gif', '.GIF', '.bmp', '.BMP']
return any(filename.lower().endswith(ext) for ext in image_extensions)
def main():
"""
主函数
"""
# 获取日期后缀参数
import sys
if len(sys.argv) > 1:
date_suffix = sys.argv[1]
if not date_suffix.startswith('_'):
date_suffix = '_' + date_suffix
else:
date_suffix = '_1114' # 默认值
print(f"使用日期后缀: {date_suffix}")
print("提示: 可以通过命令行参数修改后缀,例如: python script.py _1203")
print("=" * 80)
# 初始化MinIO客户端
minio_client = Minio(
"1.13.185.116:9000",
access_key="rag_flow", # 替换为您的access key
secret_key="infini_rag_flow", # 替换为您的secret key
secure=False
)
# 配置参数
bucket_name = "exhibit-photo" # 替换为您的bucket名称
base_path = "bj_yuanlin/category" # MinIO中的基础路径
season_dirs = ["chun_pic_md", "xia_pic_md", "qiu_pic_md", "dong_pic_md"] # 季节目录
local_base_path = "." # 本地基础路径,可以根据需要修改
print("\n开始扫描MinIO和本地文件系统...")
print(f"MinIO基础路径: {base_path}")
print(f"季节目录: {', '.join(season_dirs)}")
print(f"本地基础路径: {local_base_path}")
print(f"日期后缀: {date_suffix}")
try:
rename_mappings = process_minio_and_local_objects(
minio_client, bucket_name, base_path, season_dirs, local_base_path, date_suffix
)
if rename_mappings:
print(f"\n处理完成!共重命名 {len(rename_mappings)} 个MinIO图片文件")
else:
print(f"\n未执行任何更改")
except Exception as e:
print(f"处理过程中出现错误: {e}")
if __name__ == "__main__":
main()