1.安装
1.1composer安装phpoffice/phpword
composer require phpoffice/phpword
1.2 后台代码
$file = $this->request->request('file');
if (!$file) {
$this->error(__('Parameter %s can not be empty', 'file'));
}
$filePath = ROOT_PATH . DS . 'public' . DS . $file;
if (!is_file($filePath)) {
$this->error(__('No results were found'));
}
//实例化reader
$ext = pathinfo($filePath, PATHINFO_EXTENSION);
if (!in_array($ext, ['csv', 'xls', 'xlsx','docx'])) {
$this->error(__('Unknown data format'));
}
//下发为docx文档的处理
// 替换双斜杠为单斜杠
$filePath = str_replace('//', '/', $filePath);
$list = $this->tianhao($filePath);
1.3把文档做成数组
public function tianhao($cleanedFilePath)
{
try {
// 尝试加载 DOCX 文件
$phpWord = IOFactory::load($cleanedFilePath);
// 初始化汇总数组
$documentContent = [];
// 使用 for 循环遍历文档的段落和表格
$sectionCount = count($phpWord->getSections());
for ($i = 0; $i < $sectionCount; $i++) {
$section = $phpWord->getSections()[$i];
$elementCount = count($section->getElements());
for ($j = 0; $j < $elementCount; $j++) {
$element = $section->getElements()[$j];
// 处理文本元素
if ($element instanceof \PhpOffice\PhpWord\Element\TextRun) {
$text = '';
$textElementCount = count($element->getElements());
for ($k = 0; $k < $textElementCount; $k++) {
// 检查元素类型,只有是 Text 类型的时候才获取文本
if ($element->getElements()[$k] instanceof \PhpOffice\PhpWord\Element\Text) {
$text .= $element->getElements()[$k]->getText();
}
// 处理 Image 类型的图片
elseif ($element->getElements()[$k] instanceof \PhpOffice\PhpWord\Element\Image) {
$binaryImageData = $element->getElements()[$k]->getImageString();
$base64ImageData = base64_encode($binaryImageData);
//---
// // 生成唯一的文件名
$filename = uniqid() . '.png';
// // 指定保存路径
$uploadPath = '/www/wwwroot/tk.79524795.vip/public/wordimg/' . $filename;
// // 将 base64 数据解码并保存为文件
file_put_contents($uploadPath, base64_decode($base64ImageData));
// // 返回图片的地址
$imageUrl = 'http://tk.79524795.vip/wordimg/' . $filename;
// echo $imageUrl;
//---
$text .= $imageUrl;
// $text .= $base64ImageData;
}
}
$documentContent[] = $text;
}
// 处理表格元素
elseif ($element instanceof \PhpOffice\PhpWord\Element\Table) {
foreach ($element->getRows() as $ele)
{
$return[] = $this->getTableNode($ele);
}
$documentContent[] = $return;
}
}
}
return $documentContent;
// 输出汇总数组
// var_dump($documentContent);
// die;
} catch (\PhpOffice\PhpWord\Exception\Exception $e) {
// 捕获异常并输出错误信息
die('Error loading DOCX file: ' . $e->getMessage());
}
}
标签:word,filePath,text,element,文档,file,PhpWord,PHP,getElements
From: https://www.cnblogs.com/79524795-Tian/p/17920123.html