1.下载hteml2text模块
2.代码
import os import html2text input_folder = "C:\\Users\\jude\\Desktop\\res\\cnblogs_blog_judes.20240831122513\\judes" # 输入文件夹路径 output_folder = "C:\\Users\\jude\\Desktop\\res\\cnblogs_blog_judes.20240831122513\\new" # 输出文件夹路径 # 创建 html2text 的实例 converter = html2text.HTML2Text() converter.body_width = 0 # 不进行换行 # 遍历文件夹中的所有文件 for filename in os.listdir(input_folder): if filename.endswith(".html"): input_path = os.path.join(input_folder, filename) output_path = os.path.join(output_folder, filename.replace(".html", ".md")) # 读取 HTML 文件内容 with open(input_path, "r", encoding="utf-8") as f: html_content = f.read() # 将 HTML 转换为 Markdown markdown_content = converter.handle(html_content) # 将 Markdown 内容写入输出文件 with open(output_path, "w", encoding="utf-8") as f: f.write(markdown_content) print(f"Converted {input_path} to {output_path}")
标签:MD,output,Python,filename,HTML,path,input,folder From: https://www.cnblogs.com/judes/p/18390797