Pypandoc使用pandoc来进行各种文本格式的转换。
安装
# 不带pandoc执行库
pip install pypandoc
# 自带pandoc
pip install pypandoc_binary
使用
import pypandoc
# convert all markdown files in a chapters/ subdirectory.
pypandoc.convert_file('chapters/*.md', 'docx', outputfile="somefile.docx")
# convert all markdown files in the book1 and book2 directories.
pypandoc.convert_file(['book1/*.md', 'book2/*.md'], 'docx', outputfile="somefile.docx")
# convert the front from another drive, and all markdown files in the chapter directory.
pypandoc.convert_file(['D:/book_front.md', 'book2/*.md'], 'docx', outputfile="somefile.docx")
支持pathlib
import pypandoc
from pathlib import Path
# single file
input = Path('somefile.md')
output = input.with_suffix('.docx')
pypandoc.convert_file(input, 'docx', outputfile=output)
# convert all markdown files in a chapters/ subdirectory.
pypandoc.convert_file(Path('chapters').glob('*.md'), 'docx', outputfile="somefile.docx")
# convert all markdown files in the book1 and book2 directories.
pypandoc.convert_file([*Path('book1').glob('*.md'), *Path('book2').glob('*.md')], 'docx', outputfile="somefile.docx")
# pathlib globs must be unpacked if they are inside lists.
参考
https://pypi.org/project/pypandoc/
https://pandoc.org/help.html
https://www.strerr.com/cn/word2html.html
https://zhuanlan.zhihu.com/p/30891168
https://pypi.org/project/pandoc/