提取超链接
from docx import Document
注意docx包不是python自带的包需要下载下一篇,我会给大家说相关的下载,可以点这里跳转到下一篇博客
from docx import RT
import re
d=Document('D:\无名字的文件夹\python练习\材料.docx')
for p in d.paragraphs:
rels=d.part.rels
for rel in rels:
if rels[rel].reltype==RT.HYPERLINK:
print("\n超链接文本为",rels[rel],"超链接网址为:",rels[rel]._target)
提取文本
doc=Document('D:\无名字的文件夹\python练习\材料.docx')
for p in doc.paragraphs:
t=p.text
print(t)
提取图片
from zipfile import ZipFile
from os.path import basename
zf = ZipFile("D:\无名字的文件夹\python练习\材料.docx")
for item in zf.filelist:
fn = item.filename
if fn.endswith(('.jpg','.jpeg','.png')):
print(fn)