这里以提取 亚马逊日期范围报告PDF汇总 的数据
根据路径下载PDF
/** * description: 文件下载 * @throws CException */ public function getFile($url, $save_dir = '', $filename = '', $type = 0) { if (trim($url) == '') { return false; } if (trim($save_dir) == '') { $save_dir = './'; } if (0 !== strrpos($save_dir, '/')) { $save_dir .= '/'; } //创建保存目录 if (!file_exists($save_dir) && !mkdir($save_dir, 0777, true)) { return false; } //获取远程文件所采用的方法 if ($type) { $ch = curl_init(); $timeout = 5; curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout); $content = curl_exec($ch); curl_close($ch); } else { ob_start(); readfile($url); $content = ob_get_contents(); ob_end_clean(); } //echo $content; $size = strlen($content); //文件大小 $fp2 = @fopen($save_dir . $filename, 'a'); @fwrite($fp2, $content); @fclose($fp2); unset($content, $url); return array( 'status' => 1, 'file_name' => $filename, 'save_path' => $save_dir . $filename, 'file_size' => $size ); }
服务器需要开启 shell_exec
shell_exec("pdftotext -layout GAN-IT_242_510181.pdf GAN-IT_242_510181.txt");
得到按行解析的txt
在就提取txt文件的 11行到16行数据
$filename = $accountnamelist[$val['accountid']].'_'.$val['accountid'].'_'.$val['planid']; if (file_exists($url .$filename. '.pdf')) { @unlink($url . $filename . '.pdf'); } $res = $modelre->getFile($val['url'], $url, $filename.'.pdf');//下载pdf $modelpdf->deleteAll('account_id=:account_id and batchnumber=:bn', [':account_id' => $val['accountid'], ':bn' => $val['batchnumber']]); shell_exec("pdftotext -layout ".$url.$filename.".pdf ".$url.$filename.".txt"); if(!file_exists($url.$filename.".txt")){return false;} $sum = 0; $content = $modelre->readTXT($url.$filename.".txt"); $dlist = $list = $data =[]; foreach ($content as $keyp=>$valp){ if($keyp>=10 && $keyp<=15 && trim($valp)){ $res = preg_replace("/\s{2,}/u","_",trim($valp));//吧两个以上的空格转换为_ $reslist = explode('_',$res); $sub = str_replace(',', '', $reslist[2]); $sum += (double)$sub;//得到数字相加的结果 $dlist[] = $reslist; } } $data['account_id'] = $val['accountid']; $data['account_name'] = $accountnamelist[$val['accountid']]; $data['description'] = json_encode($dlist); $data['total_price'] = $sum; $data['url'] = '/upload/pdflabel/'.$filename.'.pdf'; $data['batchnumber'] = $val['batchnumber']; $data['create_time'] = date('Y-m-d H:i:s'); $list[] = $data; $modelpdf->batchReplaceAll("{{amazon_report_zn_pdf}}", array_keys($list[0]), $list); $znmodel->updateAll(['is_down' => 1, 'update_at' => date("Y-m-d H:i:s")], "id='{$val['id']}'"); @unlink($url . $filename . '.txt'); return true;
标签:save,提取,url,filename,content,txt,PDF,PHP,dir From: https://www.cnblogs.com/xiangshihua/p/17440749.html