0.准备条件:文件夹为中文名,图片内含有多个下划线。
处理目标:处理成为一个
“公共标|逗号| 一个句子"的形式。
1.(统计中文,准备翻译)文件夹结构作为数据统计入excel
import os import pandas as pd def find_image_folders(parent_directory): """ 遍历父目录,寻找所有包含图片的子目录。 返回一个包含有图片的文件夹名称的列表。 """ image_folders = [] for root, dirs, files in os.walk(parent_directory): for file in files: if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp')): folder_name = os.path.basename(root) if folder_name not in image_folders: image_folders.append(folder_name) break # 一旦找到图片,停止搜索该目录 return image_folders def create_excel_file(parent_directory, folder_names): """ 在父目录中创建一个Excel文件,列出文件夹名称。 """ df = pd.DataFrame(folder_names, columns=["文件夹名称"]) output_path = os.path.join(parent_directory, "文件夹名统计.xlsx") df.to_excel(output_path, index=False) return output_path # 使用方法 parent_directory = input("请输入父目录的路径: ") image_folders = find_image_folders(parent_directory) excel_file_path = create_excel_file(parent_directory, image_folders) print(f"Excel文件已创建在: {excel_file_path}")
2.根据表格第一行的中文,批量黏贴进chatgpt进行翻译。
3.在每个文件夹下建立txt,txt名根据excel的第二列数据(翻译完的英文)。
import os import openpyxl from pathlib import Path def create_text_file(folder_path, file_name): with open(Path(folder_path) / f"{file_name}.txt", 'w') as f: pass def find_and_create_files(excel_path, parent_dir): # 加载Excel文件 workbook = openpyxl.load_workbook(excel_path) sheet = workbook.active # 读取第一列和第二列数据 folder_names = [cell.value for cell in sheet['A'] if cell.value is not None] file_titles = [cell.value for cell in sheet['B'] if cell.value is not None] # 遍历父目录下的所有子目录 for root, dirs, files in os.walk(parent_dir): for dir_name in dirs: if dir_name in folder_names: index = folder_names.index(dir_name) create_text_file(Path(root) / dir_name, file_titles[index]) def main(): excel_path = input("请输入Excel表格的完整路径: ") parent_dir = input("请输入父目录的完整路径: ") find_and_create_files(excel_path, parent_dir) if __name__ == "__main__": main()
4.根据txt名批量命名文件,删去乱码
import os import re UNWANTED_UNITS = ["undefined", "皮皮", "zly324"] IMAGE_EXTENSIONS = [".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff"] # 检查文件是否为图片 def is_image_file(filename): _, ext = os.path.splitext(filename) return ext.lower() in IMAGE_EXTENSIONS # 获取文件夹内第一个txt文件的名称(不包括扩展名) def get_first_txt_title(path): try: for file in os.listdir(path): if file.endswith(".txt"): return os.path.splitext(file)[0] except Exception as e: print(f"在'{path}'检查txt文件时发生错误:{e}") return None # 生成唯一的新文件名 def generate_unique_filename(path, filename, ext): new_file_path = os.path.join(path, filename + ext) counter = 1 while os.path.exists(new_file_path): new_file_path = os.path.join(path, f"{filename}({counter}){ext}") counter += 1 return new_file_path # 重命名图片文件 def rename_image_files(path, prefix): if not prefix: # 如果没有找到txt文件或文件夹中没有txt文件 print(f"在'{path}'中未找到txt文件或txt文件为空,跳过重命名操作。") return try: files = [f for f in os.listdir(path) if is_image_file(f) and os.path.isfile(os.path.join(path, f))] counter = 1 for file in files: filename, ext = os.path.splitext(file) # 乱码类 if re.search(r'[a-f0-9]{32}', filename) or not '_' in filename: renamed = f"({counter})" counter += 1 else: parts = re.split(r'[_]+', filename) parts.pop(0) # 删除第一个单元 # 删除特定的单元 parts = [part for part in parts if part not in UNWANTED_UNITS] # 删除所有带数字的单元 parts = [part for part in parts if not any(char.isdigit() for char in part)] # 删除特定规则的元素 while parts and re.search(r'^[a-f0-9\-]{32,}$', parts[-1]): parts.pop(-1) while parts and len(parts[-1]) <= 4: parts.pop(-1) renamed = '_'.join(parts) # 添加前缀 renamed = f"{prefix}_{renamed}" # 生成唯一的文件名 new_file_path = generate_unique_filename(path, renamed, ext) os.rename(os.path.join(path, file), new_file_path) print(f"在'{path}'中重命名完成。") except Exception as e: print(f"在'{path}'中重命名过程中发生错误:{e}") # 遍历并重命名子文件夹中的图片文件 def rename_images_in_subfolders(root_path): for subdir, dirs, _ in os.walk(root_path): prefix = get_first_txt_title(subdir) if prefix: rename_image_files(subdir, prefix) # 主函数 def main(): root_path = input("请输入总文件夹地址: ") rename_images_in_subfolders(root_path) if __name__ == "__main__": main()
5.根据txt名批量命名文件,删去乱码
import os import re UNWANTED_UNITS = ["undefined", "皮皮", "zly324"] IMAGE_EXTENSIONS = [".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff"] # 检查文件是否为图片 def is_image_file(filename): _, ext = os.path.splitext(filename) return ext.lower() in IMAGE_EXTENSIONS # 获取文件夹内第一个txt文件的名称(不包括扩展名) def get_first_txt_title(path): try: for file in os.listdir(path): if file.endswith(".txt"): return os.path.splitext(file)[0] except Exception as e: print(f"在'{path}'检查txt文件时发生错误:{e}") return None # 生成唯一的新文件名 def generate_unique_filename(path, filename, ext): new_file_path = os.path.join(path, filename + ext) counter = 1 while os.path.exists(new_file_path): new_file_path = os.path.join(path, f"{filename}({counter}){ext}") counter += 1 return new_file_path # 重命名图片文件 def rename_image_files(path, prefix): if not prefix: # 如果没有找到txt文件或文件夹中没有txt文件 print(f"在'{path}'中未找到txt文件或txt文件为空,跳过重命名操作。") return try: files = [f for f in os.listdir(path) if is_image_file(f) and os.path.isfile(os.path.join(path, f))] counter = 1 for file in files: filename, ext = os.path.splitext(file) # 乱码类 if re.search(r'[a-f0-9]{32}', filename) or not '_' in filename: renamed = f"({counter})" counter += 1 else: parts = re.split(r'[_]+', filename) parts.pop(0) # 删除第一个单元 # 删除特定的单元 parts = [part for part in parts if part not in UNWANTED_UNITS] # 删除所有带数字的单元 parts = [part for part in parts if not any(char.isdigit() for char in part)] # 删除特定规则的元素 while parts and re.search(r'^[a-f0-9\-]{32,}$', parts[-1]): parts.pop(-1) while parts and len(parts[-1]) <= 4: parts.pop(-1) renamed = '_'.join(parts) # 添加前缀 renamed = f"{prefix}_{renamed}" # 生成唯一的文件名 new_file_path = generate_unique_filename(path, renamed, ext) os.rename(os.path.join(path, file), new_file_path) print(f"在'{path}'中重命名完成。") except Exception as e: print(f"在'{path}'中重命名过程中发生错误:{e}") # 遍历并重命名子文件夹中的图片文件 def rename_images_in_subfolders(root_path): for subdir, dirs, _ in os.walk(root_path): prefix = get_first_txt_title(subdir) if prefix: rename_image_files(subdir, prefix) # 主函数 def main(): root_path = input("请输入总文件夹地址: ") rename_images_in_subfolders(root_path) if __name__ == "__main__": main()
6.下划线处理:第一个转逗号加空格,后面的转空格
import os import shutil def copy_directory(src, dst): """复制 src 目录到 dst 目录。""" try: shutil.copytree(src, dst) except FileExistsError: print(f"备份目录 '{dst}' 已存在。") def rename_image_files(directory): """重命名图片文件,第一个下划线转换为逗号加空格,其他下划线转换为空格。""" for root, dirs, files in os.walk(directory): for file in files: if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')): parts = file.split('_') if len(parts) > 1: new_file_name = parts[0] + ', ' + ' '.join(parts[1:]) else: new_file_name = file if new_file_name != file: original_file_path = os.path.join(root, file) new_file_path = os.path.join(root, new_file_name) increment = 1 while os.path.exists(new_file_path): file_name, file_extension = os.path.splitext(new_file_name) new_file_name = f"{file_name} ({increment}){file_extension}" new_file_path = os.path.join(root, new_file_name) increment += 1 os.rename(original_file_path, new_file_path) print(f"文件 {original_file_path} 已重命名为 {new_file_path}") def delete_specified_files(directory): """删除所有txt和excel文件。""" for root, dirs, files in os.walk(directory): for file in files: if file.lower().endswith(('.txt', '.xlsx')): os.remove(os.path.join(root, file)) print(f"文件 {file} 已删除。") def main(): input_directory = input("请输入要处理的目录路径: ") if not os.path.exists(input_directory): print(f"指定的目录 {input_directory} 不存在。") return backup_directory = os.path.join(input_directory, "_backup") print(f"正在创建备份目录: {backup_directory}") copy_directory(input_directory, backup_directory) print("正在重命名备份目录中的图片文件...") rename_image_files(backup_directory) print("正在删除备份目录中的txt和excel文件...") delete_specified_files(backup_directory) print("操作完成。") if __name__ == "__main__": main()
标签:流程,MJ,parts,file,path,directory,精品,txt,os From: https://www.cnblogs.com/zly324/p/17915061.html