from bs4 import BeautifulSoup
def exact_p_tag(path,f):
xhtml_file = open(path, 'r', encoding='utf-8')
xhtml_handle = xhtml_file.read()
soup = BeautifulSoup(xhtml_handle, 'lxml')
title = soup.find_all("title")
# print(title)
p_list = soup.find_all('p')
for p in p_list:
f.write(p.text+'\n')
xhtml_file.close()
import os
os.chdir('C:/Users/tellw/Downloads/test')
from pathlib import Path
xhtml_file_paths=list(Path('EPUB/xhtml').glob('*.xhtml'))
f=open('C:/Users/tellw/test/test.txt','w',encoding='utf8')
for xfp in xhtml_file_paths:
exact_p_tag(xfp,f)
f.close()
使用 Python 提取 epub 中的文本 https://fanlumaster.github.io/2021/07/08/使用-Python-提取-epub-中的文本/
创建于2409071243,修改于2409071243
标签:title,file,import,xhtml,txt,epub From: https://www.cnblogs.com/tellw/p/18401556