您好,登錄后才能下訂單哦!
這篇文章主要為大家詳細(xì)介紹了使用php怎么將HTML頁(yè)面轉(zhuǎn)換成word并保存,文中示例代碼介紹的非常詳細(xì),具有一定的參考價(jià)值,發(fā)現(xiàn)的小伙伴們可以參考一下:
php是一個(gè)嵌套的縮寫(xiě)名稱,指的是英文超級(jí)文本預(yù)處理語(yǔ)言(php:Hypertext Preprocessor)的縮寫(xiě),它的語(yǔ)法混合了C、Java、Perl以及php自創(chuàng)新的語(yǔ)法,主要用來(lái)做網(wǎng)站開(kāi)發(fā),許多小型網(wǎng)站都用php開(kāi)發(fā),因?yàn)閜hp是開(kāi)源的,從而使得php經(jīng)久不衰。
功能說(shuō)明:
20150507 — HTML中的<p>標(biāo)簽和<ol>列表標(biāo)簽的獲取
20150508 — 新增獲取文章中的圖片功能
20150509 — 新增行間距,并且過(guò)濾一下錯(cuò)誤圖片
20150514 — 新增表格處理,并且將代碼改成面向?qū)ο?br/>20150519 — 新增GD庫(kù)處理網(wǎng)絡(luò)圖片
require_once 'PHPWord.php'; require_once 'SimpleHtmlDom.class.php'; class Word{ private $url; private $LinetextArr = array(); public $CurrentDir; public $error = array(); //錯(cuò)誤數(shù)組 public $filename = null; public $Allowtag = "p,ol,ul,table"; /**數(shù)據(jù)統(tǒng)計(jì)**/ public $DownImg = 0; public $expendTime = 0; public $HttpRequestTime = 0; public $ContentLen = 0; public $HttpRequestArr = array(); public $expendmemory = 0; public function __construct($url) { $startTime = $this->_Time(); $startMemory = $this->_memory(); $this->url = $url; $UrlArr = parse_url($this->url); $this->host = $UrlArr["scheme"]."://".$UrlArr['host']; $this->CurrentDir = getcwd(); $this->LinetextArr["table"] = array(); $html = new simple_html_dom($this->url); $this->HttpRequestArr[] = $this->url; $this->HttpRequestTime++; foreach($html->find($this->Allowtag) as $key=>$value) { if($value->tag == "table") { $this->ParseTable($value,0,$this->LinetextArr["table"]); } else { $this->AnalysisHtmlDom($value); } $this->error[] = error_get_last(); } $endTime = $this->_Time(); $endMemory = $this->_memory(); $this->expendTime = round(($endTime-$startTime),2); //微秒 $this->expendmemory = round(($endMemory-$startMemory)/1000,2); //bytes $this->CreateWordDom(); } private function _Time() { return array_sum(explode(" ", microtime())); } private function _memory() { return memory_get_usage(); } /** * 解析HTML中的Table,這里考慮到多層table嵌套的情況 * @param $value HTMLDOM * @param $i 遍歷層級(jí) * **/ private function ParseTable($value,$i,$Arr) { if($value->firstChild() && in_array($value->firstChild()->tag,array("table","tbody","thead","tfoot","tr"))) { foreach($value->children as $k=>$v) { $this->ParseTable($v,$i++,$Arr); } } else { foreach($value->children as $k=>$v) { if($v->firstChild() && $v->firstChild()->tag != "table") { $Arr[$i][] = array("tag"=>$v->tag,"text"=>trim($v->plaintext)); } if(!$v->firstChild()) { $Arr[$i][] = array("tag"=>$v->tag,"text"=>trim($v->plaintext)); } } } } /** * 解析HTML里面的表情 * @param $value HTMLDOM * **/ private function AnalysisHtmlDom($value) { $tmp = array(); if($value->has_child()) { foreach($value->children as $k=>$v) { $this->AnalysisHtmlDom($v); } } else { if($value->tag == "a") { $tmp = array("tag"=>$value->tag,"href"=>$value->href,"text"=>$value->innertext); } else if($value->tag == "img") { $src = $this->unescape($value->src); $UrlArr = parse_url($src); if(!isset($UrlArr['host'])) { $src = $this->host.$value->src; $UrlArr = parse_url($src); } $src = $this->getImageFromNet($src,$UrlArr); //表示有網(wǎng)絡(luò)圖片,需要下載 if($src) { $imgsArr = $this->GD($src); $tmp = array("tag"=>$value->tag,"src"=>$src,"text"=>$value->alt,"width"=>$imgsArr['width'],"height"=>$imgsArr['height']); } } else { $tmp = array("tag"=>$value->tag,"text"=>strip_tags($value->innertext)); } $this->LinetextArr[] = $tmp; } } /** * 根據(jù)GD庫(kù)來(lái)獲取圖片的如果太多,進(jìn)行比例壓縮 * **/ private function GD($src) { list($width, $height, $type, $attr) = getimagesize($src); if($width > 800 || $height > 800 ) { $width = $width/2; $height = $height/2; } return array("width"=>$width,"height"=>$height); } /** * 將Uincode編碼轉(zhuǎn)移回原來(lái)的字符 * **/ public function unescape($str) { $str = rawurldecode($str); preg_match_all("/(?:%u.{4})|&#x.{4};|&#\d+;|.+/U",$str,$r); $ar = $r[0]; foreach($ar as $k=>$v) { if(substr($v,0,2) == "%u"){ $ar[$k] = iconv("UCS-2BE","UTF-8",pack("H4",substr($v,-4))); } elseif(substr($v,0,3) == "&#x"){ $ar[$k] = iconv("UCS-2BE","UTF-8",pack("H4",substr($v,3,-1))); } elseif(substr($v,0,2) == "&#"){ $ar[$k] = iconv("UCS-2BE","UTF-8",pack("n",substr($v,2,-1))); } } return join("",$ar); } /** * 圖片下載 * @param $Src 目標(biāo)資源 * @param $UrlArr 目標(biāo)URL對(duì)應(yīng)的數(shù)組 * **/ private function getImageFromNet($Src,$UrlArr) { $file = basename($UrlArr['path']); $ext = explode('.',$file); $this->ImgDir = $this->CurrentDir."/".$UrlArr['host']; $_supportedImageTypes = array('jpg', 'jpeg', 'gif', 'png', 'bmp', 'tif', 'tiff'); if(isset($ext['1']) && in_array($ext['1'],$_supportedImageTypes)) { $file = file_get_contents($Src); $this->HttpRequestArr[] = $Src; $this->HttpRequestTime++; $this->_mkdir(); //創(chuàng)建目錄,或者收集錯(cuò)誤 $imgName = md5($UrlArr['path']).".".$ext['1']; file_put_contents($this->ImgDir."/".$imgName,$file); $this->DownImg++; return $UrlArr['host']."/".$imgName; } return false; } /** * 創(chuàng)建目錄 * **/ private function _mkdir() { if(!is_dir($this->ImgDir)) { if(!mkdir($this->ImgDir,"7777")) { $this->error[] = error_get_last(); } } } /** * 構(gòu)造WordDom * **/ private function CreateWordDom() { $PHPWord = new PHPWord(); $PHPWord->setDefaultFontName('宋體'); $PHPWord->setDefaultFontSize("11"); $styleTable = array('borderSize'=>6, 'borderColor'=>'006699', 'cellMargin'=>120); // New portrait section $section = $PHPWord->createSection(); $section->addText($this->Details(),array(),array('spacing'=>120)); //數(shù)據(jù)進(jìn)行處理 foreach($this->LinetextArr as $key=>$lineArr) { if(isset($lineArr['tag'])) { if($lineArr['tag'] == "li") { $section->addListItem($lineArr['text'],0,"","",array('spacing'=>120)); } else if($lineArr['tag'] == "img") { $section->addImage($lineArr['src'],array('width'=>$lineArr['width'], 'height'=>$lineArr['height'], 'align'=>'center')); } else if($lineArr['tag'] == "p") { $section->addText($lineArr['text'],array(),array('spacing'=>120)); } } else if($key == "table") { $PHPWord->addTableStyle('myOwnTableStyle', $styleTable); $table = $section->addTable("myOwnTableStyle"); foreach($lineArr as $key=>$tr) { $table->addRow(); foreach($tr as $ky=>$td) { $table->addCell(2000)->addText($td['text']); } } } } $this->downFile($PHPWord); } public function Details() { $msg = "一共請(qǐng)求:{$this->HttpRequestTime}次,共下載的圖片有{$this->DownImg}張,并且下載完成大約使用時(shí)間:{$this->expendTime}秒,整個(gè)程序執(zhí)行大約消耗內(nèi)存是:{$this->expendmemory}KB,"; return $msg; } public function downFile($PHPWord) { if(empty($this->filename)) { $UrlArr = parse_url($this->url); $this->filename = $UrlArr['host'].".docx"; } // Save File $objWriter = PHPWord_IOFactory::createWriter($PHPWord, 'Word2007'); $objWriter->save($this->filename); header("Pragma: public"); header("Expires: 0"); header("Cache-Control: must-revalidate, post-check=0, pre-check=0"); header("Cache-Control: public"); header("Content-Description: File Transfer"); //Use the switch-generated Content-Type header('Content-type: application/msword');//輸出的類(lèi)型 //Force the download $header="Content-Disposition: attachment; filename=".$this->filename.";"; header($header); @readfile($this->filename); } }
上面的代碼重點(diǎn)感覺(jué)不是word生成,而是Simplehtmldom的使用,這是一個(gè)開(kāi)源的HTML解析器,之前有提到,這幾天在看他的代碼,
引出了兩個(gè)學(xué)習(xí)方向
① 正在表達(dá)式
② 這個(gè)擴(kuò)展的函數(shù)整理
看源代碼的收獲:
PHP的異常是可以捕獲的,而且PHP的錯(cuò)誤也是可以捕獲的。
error_get_last() //用這個(gè)函數(shù)可以捕獲頁(yè)面中的PHP錯(cuò)誤,不謝。
以上就是億速云小編為大家收集整理的使用php怎么將HTML頁(yè)面轉(zhuǎn)換成word并保存,如何覺(jué)得億速云網(wǎng)站的內(nèi)容還不錯(cuò),歡迎將億速云網(wǎng)站推薦給身邊好友。
免責(zé)聲明:本站發(fā)布的內(nèi)容(圖片、視頻和文字)以原創(chuàng)、轉(zhuǎn)載和分享為主,文章觀點(diǎn)不代表本網(wǎng)站立場(chǎng),如果涉及侵權(quán)請(qǐng)聯(lián)系站長(zhǎng)郵箱:is@yisu.com進(jìn)行舉報(bào),并提供相關(guān)證據(jù),一經(jīng)查實(shí),將立刻刪除涉嫌侵權(quán)內(nèi)容。