一、只输出文本内容
from pptx import Presentation from docx import Document wordfile = Document() # 给定ppt文件所在的路径 filepath = r"C:\Users\18703\Desktop\智家经分\智家经分v2.pptx" pptx = Presentation(filepath) # 遍历ppt文件的所有幻灯片页 for slide in pptx.slides: # 遍历幻灯片页的所有形状 for shape in slide.shapes: # 判断形状是否含有文本框,如果含有则顺序运行代码 if shape.has_text_frame: # 获取文本框 text_frame = shape.text_frame # 遍历文本框中的所有段落 for paragraph in text_frame.paragraphs: # 将文本框中的段落文字写入word中 wordfile.add_paragraph(paragraph.text) #word文档存放的路径 save_path = r"C:\Users\18703\Desktop\智家经分\智家经分.docx" wordfile.save(save_path)
二、输出文本、表格、图片内容
import pandas as pd from pptx import Presentation from pptx.shapes.picture import Picture prs = Presentation(r"C:\Users\18703\Desktop\智家经分\智家经分v2.pptx")#这是你ppt的路径 index = 1 #读取幻灯片的每一页 wordfile = Document() for slide in prs.slides: # 读取每一板块 for shape in slide.shapes: # print(dir(shape)) #是否有文字框 if shape.has_text_frame: #读文字框的每一段落 for paragraph in shape.text_frame.paragraphs: if paragraph.text: # 输出段落文字,也有一些属性,可以用dir查看 # print(dir(paragraph)) print(paragraph.text) #是否有表格 elif shape.has_table: one_table_data = [] for row in shape.table.rows: # 读每行 row_data = [] for cell in row.cells: # 读一行中的所有单元格 c = cell.text row_data.append(c) one_table_data.append(row_data) # 把每一行存入表 #用二维列表输出表格行和列的数据 print(one_table_data) # 是否有图片 elif isinstance(shape, Picture): #shape.image.blob:二进制图像字节流,写入图像文件 with open(f'{index}.jpg', 'wb') as f: f.write(shape.image.blob) index += 1 print(f)
参考:https://blog.csdn.net/rubyw/article/details/130829579
标签:pptx,word,python,text,frame,shape,paragraph,ppt,经分 From: https://www.cnblogs.com/pu369/p/17761834.html