"""convert code to markdown
"""
import asyncio
import datetime
import os
import platform
import re
import shutil
import xmlrpc.client
class ST:
# <xxxxxxxxx>
# <xxxxxxxxx>
# <xxxxxxxxx>
# <xxxxxxxxx>
pass
class MarkdownToCnblog:
"""_summary_
Returns:
_type_: _description_
"""
__slots__ = [
"path",
"base_dir",
"raw_md",
"images",
"net_images",
"raw_md",
"new_md",
"mime_mapping",
"server",
"blog_title",
"blog_tags",
"img_format",
"gen_network_file",
"publish",
]
def __init__(self, path: str) -> None:
self.path = path
self.base_dir = os.path.dirname(path)
self.net_images = []
self.mime_mapping = {
".ico": "image/x-icon",
".jpg": "image/jpeg",
".jpe": "image/jpeg",
".jpeg": "image/jpeg",
".png": "image/png",
}
self.img_format = "typora"
self.publish = True
self.gen_network_file = False
try:
self.server = xmlrpc.client.ServerProxy(ST.blog_url)
except Exception as ex:
if "unsupported XML-RPC protocol" in str(ex):
print("请查看config.yaml文件中的blog_url,应该是这个URL地址没设置对")
def upload_markdown(self):
self.__read_markdown()
self.__find_md_img()
self.__upload_img()
self.__get_title_and_tag()
self.__upload_blog()
def __get_title_and_tag(self):
res = re.findall(re.compile(r"<!--(.*)-->"), self.raw_md)
if len(res) > 0:
self.blog_title = res[0].strip()
self.blog_tags = res[1].strip()
def __upload_blog(self):
post = dict(description=self.new_md, title=self.blog_title, categories=["[Markdown]"], mt_keywords=self.blog_tags)
try:
self.server.metaWeblog.newPost(ST.blog_id, ST.username, ST.password, post, self.publish)
print(f"===========上传成功===============")
except Exception as ex:
print(f"上传失败,相同博文已存在{ex}")
def __upload_img(self):
"""upload img"""
if self.images: # 有本地图片,异步上传
tasks = []
for i, li in enumerate(self.images):
if li.startswith("/"): # 本地图片
li = "." + li
image_full_path = os.path.join(self.base_dir, li)
if not os.path.exists(image_full_path):
print(f"图片{image_full_path}不存在")
continue
task = asyncio.ensure_future(self.__upload_md_img(image_full_path))
task.add_done_callback(lambda futrue, idx=i, img_path=image_full_path: self.get_image_url(futrue, idx, img_path))
tasks.append(task)
if len(tasks) == 0:
return
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
loop.close()
image_mapping = dict(zip(self.images, self.net_images))
self.new_md = self.replace_md_img(image_mapping)
def get_image_url(self, t: asyncio.Task, idx, img_path):
"""回调,获取url"""
img_url = t.result()
print(f"{idx}->\t{img_path}\t上传成功,URL:{img_url}")
self.net_images.append(img_url)
async def __upload_md_img(self, path):
"""上传图片"""
await asyncio.sleep(2)
name = os.path.basename(path)
_, suffix = os.path.splitext(name)
with open(path, "rb") as f:
file = {"bits": f.read(), "name": name, "type": self.mime_mapping[suffix]}
url = self.server.metaWeblog.newMediaObject(ST.blog_id, ST.username, ST.password, file)
return url.get("url")
def __find_md_img(self):
"""查找markdown中的图片,排除网络图片(不用上传)"""
images = re.findall("!\\[.*?\\]\\((.*)\\)", self.raw_md)
images += re.findall('<img src="(.*?)"', self.raw_md)
self.images = [i for i in images if not re.match("((http(s?))|(ftp))://.*", i)]
print(f"共找到{len(images)}张本地图片{images}")
def __read_markdown(self):
with open(self.path, "r", encoding="utf-8") as f:
self.raw_md = f.read()
self.new_md = self.raw_md
def replace_md_img(self, img_mapping):
"""替换markdown中的图片链接"""
with open(self.path, "r", encoding="utf-8") as fr:
md = fr.read()
for local, net in img_mapping.items(): # 替换图片链接
md = md.replace(local, net)
if self.img_format:
md_links = re.findall("!\\[.*?\\]\\(.*?\\)", md)
md_links += re.findall("<img src=.*/>", md)
for ml in md_links:
img_url = re.findall("!\\[.*?\\]\\((.*?)\\)", ml)
img_url += re.findall('<img src="(.*?)"', ml)
img_url = img_url[0]
if self.img_format == "typora":
zoom = re.findall(r'style="zoom:(.*)%;"', ml)
if zoom:
md = md.replace(ml, f'<center><img src="{img_url}" style="width:{zoom[0]}%;" /></center>')
else:
md = md.replace(ml, self.img_format.format(img_url))
if self.gen_network_file:
path_net = os.path.join(os.path.dirname(self.path), "_network".join(os.path.splitext(os.path.basename(self.path))))
with open(path_net, "w", encoding="utf-8") as fw:
fw.write(md)
print(f"图片链接替换完成,生成新markdown:{path_net}")
return md
class CodeToMarkDown:
"""_summary_"""
__slots__ = ["path", "md_path", "code_file_path", "exclude_dirs", "exclude_files", "md_suffix_table", "include_exts", "key_work_filter_list"]
def __init__(self, path: str = None) -> None:
if path:
self.path = path
else:
self.path = os.getcwd()
# 需要排除的目录
self.exclude_dirs = [
"__pycache__",
"venv",
"build",
"dist",
"node_modules",
"public",
"LICENSE",
"assets",
"vendor",
"tmp",
"static",
"templates",
"bin",
"obj",
"Migrations",
"Properties",
"packages",
]
# 排除的文件的后缀
self.exclude_files = [
"_NOTE.md",
".d.ts",
".lock",
".png",
".woff2",
".ttf",
".woff",
".css",
"README.md",
".toml",
"swagger-ui-bundle.js",
"-lock.json",
"zz_code2md.py",
"zz.py",
"temp.md",
]
# 文件后缀名对应的 md code代码提示名
self.md_suffix_table = {"command": "sh", "csproj": "xml"}
# 需要包含的文件后缀名
self.include_exts = [
".py",
".vue",
".js",
".ts",
".html",
".go",
".mod",
".json",
".txt",
".sh",
".command",
".cs",
"csproj",
".jsx",
".sln",
".sh",
".bat",
]
# 需要过滤的文件名的后缀
self.key_work_filter_list = [""]
def generate_md(self):
self.__generate_md_file_path()
self.__collect_code_files()
self.__generate_md_file()
def __generate_md_file_path(self):
cur_time_str = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
md_name = f"Z_{cur_time_str}_NOTE.md"
self.md_path = os.path.join(self.path, md_name)
def __collect_code_files(self):
"""_summary_
Returns:
_type_: _description_
"""
self.code_file_path = []
for root, dirs, files in os.walk(self.path):
# 过滤不符合的文件夹------------------------------------------------------------------------
dirs[:] = [d for d in dirs if not d.startswith(".") and not any(ex in d for ex in self.exclude_dirs)]
# 过滤不符合的文件-----------------------------------------------------------------------------
files[:] = [f for f in files if not f.startswith(".") and not any(ex in f for ex in self.exclude_files)]
# 筛选所有符合后缀的文件------------------------------------------------------------------------
for file in files:
# 正向过滤含有(\.py|vue|js|ts)$ 结尾的文件
if any(file.endswith(ext) for ext in self.include_exts):
self.code_file_path.append(os.path.join(root, file))
def __generate_md_file(self):
for i, code_file_path in enumerate(self.code_file_path):
print(i + 1, "->", self.__get_md_title_level_one(code_file_path))
self.__readcode_writemd(code_file_path)
def __get_md_title_level_one(self, code_file_path):
"""获取每个代码文件的md标题,去掉项目之前的文件路径
Args:
code_file_path (_type_): 代码路径
project_path (_type_): 项目根路径
Returns:
_type_: 每个代码文件的md标题
"""
# Get the common prefix of the two paths
common_prefix = os.path.commonprefix([code_file_path, self.path])
# Get the different parts of the two paths
diff1 = code_file_path[len(common_prefix) + 1 :]
md_title = os.path.join(os.path.basename(self.path), diff1)
return md_title
def __readcode_writemd(self, code_file_path):
"""_summary_
Args:
code_file_path (_type_): _description_
"""
with open(code_file_path, "r", encoding="utf-8") as f: # 打开文件
try:
content = f.read()
except Exception as e:
print(f"{code_file_path}{e}文件编码读取错误")
content = ""
self.__write2md(content, code_file_path)
def __write2md(
self,
content,
code_file_path,
):
"""_summary_
Args:
content (_type_): _description_
suffix (_type_): _description_
code_file_path (_type_): _description_
"""
with open(self.md_path, "a", encoding="utf-8") as f: # 打开文件
md_title_level_one = self.__get_md_title_level_one(code_file_path)
code_label = self.__get_code_md_lable_by_suffix(code_file_path)
f.write("\n")
f.write(f"# `{md_title_level_one}`\n\n")
f.write(f"```{code_label}\n")
f.write(content)
f.write("\n")
f.write("```\n\n\n")
def __get_code_md_lable_by_suffix(self, code_file_path):
suffix = re.findall(r'\.[^.\\/:*?"<>|\r\n]+$', code_file_path)
if len(suffix):
suffix = suffix[0][1:]
if self.md_suffix_table.get(suffix) is not None:
return self.md_suffix_table.get(suffix)
return suffix
class MarkdownToCode:
"""_summary_
Returns:
_type_: _description_
"""
__slots__ = ["path", "base_dir"]
def __init__(self, path: str = None) -> None:
if path:
self.path = path
else:
self.path = self.__get_latest_md_file_path()
def __get_latest_md_file_path(self):
dst_md_files = []
for root, _, files in os.walk(os.getcwd()):
for file in files:
if file.endswith("_NOTE.md"):
dst_md_files.append(os.path.join(root, file))
return sorted(dst_md_files).pop()
def generate_code(self):
self.__set_base_dir()
self.__read_md_file()
def __read_md_file(self):
"""_summary_"""
with open(self.path, "r", encoding="utf-8") as f: # 打开文件
md_text = f.read()
# Match the first-level headings and code blocks
# \n{1,}# `(.+)`\n{1,}```\w{2,5}\n{1,}
pattern = r"^# `(.+)`\n{1,}```(?:\w{2,}\n)([\s\S]+?)\n{1,}```\n{1,}"
matches = re.findall(pattern, md_text, re.MULTILINE)
# Loop over the matches
for i, (file_path, code) in enumerate(matches):
print(f"{i}->", file_path)
self.__create_from_file_path(file_path, code)
def __set_base_dir(self):
self.base_dir = os.path.dirname(self.path)
def __create_from_file_path(self, file_path, content):
"""_summary_
Args:
file_path (_type_): _description_
content (_type_): _description_
"""
dir_path = os.path.join(self.base_dir, os.path.dirname(file_path))
os.makedirs(dir_path, exist_ok=True)
full_file_path = os.path.join(self.base_dir, file_path)
with open(full_file_path, "w", encoding="utf-8") as f:
f.write(content)
class CollectMarkdownNote:
"""_summary_"""
__slots__ = [
"path",
"md_path",
"cur_file_name",
"cur_file_new_name",
"md_files",
"dotnet_md_files",
"python_md_files",
"dotnet_line_number_start",
"dotnet_split_flag",
"dotnet_file_names",
"python_split_flag",
"current_new_md_file_name",
"save_md_file_heads",
"split_flag",
"other_split_flag",
"other_md_files",
]
def __init__(self, path: str = None) -> None:
if path:
self.path = path
else:
self.path = os.getcwd()
self.dotnet_line_number_start = []
self.dotnet_file_names = []
self.current_new_md_file_name = None
# 需要保留的markdown文件的开头
# self.save_md_file_heads = ["zz_dotnet", "zz_python", "zz_csharp", "zz_note", "zz_"]
self.save_md_file_heads = ["zz_"]
# 需要需要的markdown文件,zz_开头,不在zz_note里面的markdown文件
self.md_files = []
# 需要收集的关于dotent的md文件,zz_开头,不在zz_note里面
self.dotnet_md_files = []
# 需要收集的关于python的md文件,zz_开头,不在zz_note里面
self.python_md_files = []
# 需要收集的关于python的md文件,zz_开头,不在zz_note里面
self.other_md_files = []
# 即将生成的python文件的文件名,就是当前文件的文件夹,加上时间戳的后缀
self.cur_file_new_name = None
# 当前这个python脚本的文件名
self.cur_file_name = os.path.basename(__file__)
def collect_markdown(self):
"""收集当前项目中的markdown笔记"""
self.__get_new_python_file_name()
self.__get_cur_markdown_notes()
self.__read_cur_markdown_notes()
self.__generate_new_python_file()
def __write2md(self, line):
# 如我文件名不为空,代表可以写入,同时要过滤到python中的注释符号,防止python脚本出现错误
if (self.current_new_md_file_name is not None) and line != '"""\n' and line != '"""' and line != '\n"""':
nf = open(self.current_new_md_file_name, "a+", encoding="utf-8")
nf.write(line)
nf.close()
def un_collect_markdown(self):
"""doc"""
f = open(self.cur_file_name, "r", encoding="utf-8")
for i, line in enumerate(f):
self.__write2md(line)
res = re.findall(re.compile(r"^\"\"\"#==(.{1,10})=="), line)
if len(res) > 0:
self.current_new_md_file_name = self.__get_output_md_file_name_by_line(line)
print(i + 1, "->", self.current_new_md_file_name, "start")
if line.startswith('"""\n'):
print(i + 1, "->", self.current_new_md_file_name, "end")
self.current_new_md_file_name = None
def __get_output_md_file_name_by_line(self, line):
# 输出zz_note的子文件夹的名称,从"""==python==提取
dir_name = "other"
name = "other"
pattern = re.compile(r"^\"\"\"#==(.{1,10})==(.+).md")
res = pattern.match(line)
if res:
dir_name = res.groups()[0]
name = res.groups()[1]
# 生成markdown文件的后缀名,防止冲突
cur_time_str = str(datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S"))
# 生成的markdown文件的文件夹,zz_note/python,或者给zz_dotnet,取决于什么类型的文件
dir_path = os.path.join("zz_note", dir_name)
os.makedirs(dir_path, exist_ok=True)
return "zz_note/" + dir_name + "/" + name + "_" + cur_time_str + ".md"
def __read_cur_markdown_notes(self):
"""_summary_"""
for md_file in self.md_files:
if "dotnet" in md_file:
self.dotnet_md_files.append(md_file)
elif "python" in md_file:
self.python_md_files.append(md_file)
elif "other" in md_file:
self.other_md_files.append(md_file)
def __get_cur_markdown_notes(self):
"""_summary_"""
print("开始收集,所以以zz_开头的,不在zz_note文件夹中的markdown文件")
pattern = re.compile(r"zz_(\w+)_")
for root, dirs, files in os.walk(self.path):
dirs[:] = [d for d in dirs if "zz_note" not in d]
# 过滤符合的文件-----------------------------------------------------------------------------
files[:] = [f for f in files if not f.startswith(".") and any(ex in f for ex in [".md"])]
# 筛选所有符合后缀的文件------------------------------------------------------------------------
for i, file in enumerate(files):
res = pattern.findall(file)
if len(res) > 0:
print(i + 1, "->", file)
self.md_files.append(os.path.join(root, file))
def __get_new_python_file_name(self):
cur_time_str = str(datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S"))
file_names = self.cur_file_name.split("_")
if len(file_names) > 1:
self.cur_file_new_name = f"{os.path.splitext(file_names[0])[0]}_{cur_time_str}.py"
else:
self.cur_file_new_name = f"{os.path.splitext(self.cur_file_name)[0]}_{cur_time_str}.py"
def __get_cur_file_name(self, md_file_path):
return os.path.basename(md_file_path)
def __generate_new_python_file(self):
"""_summary_"""
f = open(self.cur_file_name, "r", encoding="utf-8")
for line in f:
# 忽略需要隐藏的行
pattern = re.compile(r"# {0,1}@ignore")
res = pattern.findall(line)
if len(res) > 0:
line = "# <xxxxxxxxx>\n"
nf = open(self.cur_file_new_name, "a+", encoding="utf-8")
nf.write(line)
for md_file in self.md_files:
split_flag = "other"
pattern = re.compile(r"zz_(\w+)_")
res = pattern.findall(md_file)
if len(res) > 0:
split_flag = res[0]
md_title = self.__get_cur_file_name(md_file)
content = self.__read_md_file(md_file)
nf.write(f'\n\n"""#=={split_flag}=={md_title}\n\n')
nf.write(self.__filter_python_comment(content))
nf.write('\n"""\n\n')
f.close()
nf.close()
# 备份新生成的python文件
self.__copy_current_python_file_to_dst_dir(self.cur_file_new_name)
def __copy_current_python_file_to_dst_dir(self, current_python_file_name):
"""备份新生成的python文件,到特定的文件夹
Args:
current_python_file_name (_type_): _description_
"""
system = platform.system()
if system == "Windows":
dir_path = r"D:/zz"
os.makedirs(dir_path, exist_ok=True)
shutil.copy(current_python_file_name, os.path.join(dir_path, current_python_file_name))
elif system == "Darwin":
dir_path = r"/Users/song/Code/zz_note"
os.makedirs(dir_path, exist_ok=True)
shutil.copy(current_python_file_name, os.path.join(dir_path, current_python_file_name))
else:
print("Unknown system")
def __filter_python_comment(self, content: str):
return content.replace('"""', "")
def __read_md_file(self, file):
with open(file, "r", encoding="utf-8") as f:
content = f.read()
return content
def backup_current_project():
"""_summary_"""
src_dir = os.path.dirname(__file__)
cur_time_str = str(datetime.datetime.now().strftime("%Y%m%d_%H%M%S"))
compress_filename = f"{os.path.basename(src_dir)}_{cur_time_str}"
dst_dir = os.path.join(
os.path.dirname(os.path.dirname(__file__)),
compress_filename,
)
shutil.copytree(src_dir, dst_dir)
if __name__ == "__main__":
print("===============================start===============================")
print("1. Project Convert to Markdown 2. Markdown Convert to Project")
print("3. Collect Markdown Notes 4. Uncollect Markdown Notes")
print("5. Backup the project 6. Upload a markdown file ")
try:
option_number = int(input("Please input a number: "))
except Exception as e:
option_number = 0
print("您输入的整数是:", option_number)
if option_number == 1:
code2md = CodeToMarkDown()
code2md.generate_md()
elif option_number == 2:
md_path = input("请输入需要转换的markdown文件路径(默认使用当前路径最新的markdown文件) : ")
md2code = MarkdownToCode(md_path)
md2code.generate_code()
elif option_number == 3:
collect_md = CollectMarkdownNote()
collect_md.collect_markdown()
elif option_number == 4:
is_del = input("是否要删除现有的文件夹(Y/N):")
if is_del == "N" or is_del == "n":
pass
else:
try:
res = shutil.rmtree("zz_note")
print("zz_note文件夹,删除成功")
except FileNotFoundError as e:
print("no such directory,zz_note")
collect_md = CollectMarkdownNote()
collect_md.un_collect_markdown()
elif option_number == 5:
backup_current_project()
elif option_number == 6:
md_path = input("请输入需要上传的markdown文件路径: ")
md2cnblog = MarkdownToCnblog(os.path.abspath(md_path))
md2cnblog.upload_markdown()
else:
print("unknown option")
print("===============================done===============================")
标签:__,md,name,self,code2md,md2code,file,path
From: https://www.cnblogs.com/zhuoss/p/17977784