Server IP : 103.119.228.120 / Your IP : 18.116.15.22 Web Server : Apache System : Linux v8.techscape8.com 3.10.0-1160.119.1.el7.tuxcare.els2.x86_64 #1 SMP Mon Jul 15 12:09:18 UTC 2024 x86_64 User : nobody ( 99) PHP Version : 5.6.40 Disable Function : shell_exec,symlink,system,exec,proc_get_status,proc_nice,proc_terminate,define_syslog_variables,syslog,openlog,closelog,escapeshellcmd,passthru,ocinum cols,ini_alter,leak,listen,chgrp,apache_note,apache_setenv,debugger_on,debugger_off,ftp_exec,dl,dll,myshellexec,proc_open,socket_bind,proc_close,escapeshellarg,parse_ini_filepopen,fpassthru,exec,passthru,escapeshellarg,escapeshellcmd,proc_close,proc_open,ini_alter,popen,show_source,proc_nice,proc_terminate,proc_get_status,proc_close,pfsockopen,leak,apache_child_terminate,posix_kill,posix_mkfifo,posix_setpgid,posix_setsid,posix_setuid,dl,symlink,shell_exec,system,dl,passthru,escapeshellarg,escapeshellcmd,myshellexec,c99_buff_prepare,c99_sess_put,fpassthru,getdisfunc,fx29exec,fx29exec2,is_windows,disp_freespace,fx29sh_getupdate,fx29_buff_prepare,fx29_sess_put,fx29shexit,fx29fsearch,fx29ftpbrutecheck,fx29sh_tools,fx29sh_about,milw0rm,imagez,sh_name,myshellexec,checkproxyhost,dosyayicek,c99_buff_prepare,c99_sess_put,c99getsource,c99sh_getupdate,c99fsearch,c99shexit,view_perms,posix_getpwuid,posix_getgrgid,posix_kill,parse_perms,parsesort,view_perms_color,set_encoder_input,ls_setcheckboxall,ls_reverse_all,rsg_read,rsg_glob,selfURL,dispsecinfo,unix2DosTime,addFile,system,get_users,view_size,DirFiles,DirFilesWide,DirPrintHTMLHeaders,GetFilesTotal,GetTitles,GetTimeTotal,GetMatchesCount,GetFileMatchesCount,GetResultFiles,fs_copy_dir,fs_copy_obj,fs_move_dir,fs_move_obj,fs_rmdir,SearchText,getmicrotime MySQL : ON | cURL : ON | WGET : ON | Perl : ON | Python : ON | Sudo : ON | Pkexec : ON Directory : /home/hendraso/public_html/Additional/Html2Pdf/_class/ |
Upload File : |
<?php /** * HTML2PDF Librairy - parsingHtml class * * HTML => PDF convertor * distributed under the LGPL License * * @author Laurent MINGUET <webmaster@html2pdf.fr> * @version 4.03 */ class HTML2PDF_parsingHtml { protected $_html = ''; // HTML code to parse protected $_num = 0; // table number protected $_level = 0; // table level protected $_encoding = ''; // encoding public $code = array(); // parsed HTML codfe const HTML_TAB = ' '; /** * main constructor * * @param string encoding * @access public */ public function __construct($encoding = 'UTF-8') { $this->_num = 0; $this->_level = array($this->_num); $this->_html = ''; $this->code = array(); $this->setEncoding($encoding); } /** * change the encoding * * @param string encoding * @access public */ public function setEncoding($encoding) { $this->_encoding = $encoding; } /** * Define the HTML code to parse * * @param string HTML code * @access public */ public function setHTML($html) { // remove the HTML in comment $html = preg_replace('/<!--(.*)-->/isU', '', $html); // save the HTML code $this->_html = $html; } /** * parse the HTML code * * @access public */ public function parse() { $parents = array(); // flag : are we in a <pre> Tag ? $tagPreIn = false; // action to use for each line of the content of a <pre> Tag $tagPreBr = array( 'name' => 'br', 'close' => false, 'param' => array( 'style' => array(), 'num' => 0 ) ); // tag that can be not closed $tagsNotClosed = array( 'br', 'hr', 'img', 'col', 'input', 'link', 'option', 'circle', 'ellipse', 'path', 'rect', 'line', 'polygon', 'polyline' ); // search the HTML tags $tmp = array(); $this->_searchCode($tmp); // all the actions to do $actions = array(); // foreach part of the HTML code foreach ($tmp as $part) { // if it is a tag code if ($part[0]=='code') { // analise the HTML code $res = $this->_analiseCode($part[1]); // if it is a real HTML tag if ($res) { // save the current posistion in the HTML code $res['html_pos'] = $part[2]; // if the tag must be closed if (!in_array($res['name'], $tagsNotClosed)) { // if it is a closure tag if ($res['close']) { // HTML validation if (count($parents)<1) throw new HTML2PDF_exception(3, $res['name'], $this->getHtmlErrorCode($res['html_pos'])); else if ($parents[count($parents)-1]!=$res['name']) throw new HTML2PDF_exception(4, $parents, $this->getHtmlErrorCode($res['html_pos'])); else unset($parents[count($parents)-1]); } else { // if it is a autoclosed tag if ($res['autoclose']) { // save the opened tag $actions[] = $res; // prepare the closed tag $res['params'] = array(); $res['close'] = true; } // else :add a child for validation else $parents[count($parents)] = $res['name']; } // if it is a <pre> tag (or <code> tag) not auclosed => update the flag if (($res['name']=='pre' || $res['name']=='code') && !$res['autoclose']) { $tagPreIn = !$res['close']; } } // save the actions to convert $actions[] = $res; } else { // else (it is not a real HTML tag => we transform it in Texte $part[0]='txt'; } } // if it is text if ($part[0]=='txt') { // if we are not in a <pre> tag if (!$tagPreIn) { // save the action $actions[] = array( 'name' => 'write', 'close' => false, 'param' => array('txt' => $this->_prepareTxt($part[1])), ); } else { // else (if we are in a <pre> tag) // prepare the text $part[1] = str_replace("\r", '', $part[1]); $part[1] = explode("\n", $part[1]); // foreach line of the text foreach ($part[1] as $k => $txt) { // transform the line $txt = str_replace("\t", self::HTML_TAB, $txt); $txt = str_replace(' ', ' ', $txt); // add a break line if ($k>0) $actions[] = $tagPreBr; // save the action $actions[] = array( 'name' => 'write', 'close' => false, 'param' => array('txt' => $this->_prepareTxt($txt, false)), ); } } } } // for each indentified action, we have to clean up the begin and the end of the texte // based on tags that surround it // list of the tags to clean $tagsToClean = array( 'page', 'page_header', 'page_footer', 'form', 'table', 'thead', 'tfoot', 'tr', 'td', 'th', 'br', 'div', 'hr', 'p', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'bookmark', 'fieldset', 'legend', 'draw', 'circle', 'ellipse', 'path', 'rect', 'line', 'g', 'polygon', 'polyline', 'option' ); // foreach action $nb = count($actions); for ($k=0; $k<$nb; $k++) { // if it is a Text if ($actions[$k]['name']=='write') { // if the tag before the text is a tag to clean => ltrim on the text if ($k>0 && in_array($actions[$k-1]['name'], $tagsToClean)) $actions[$k]['param']['txt'] = ltrim($actions[$k]['param']['txt']); // if the tag after the text is a tag to clean => rtrim on the text if ($k<$nb-1 && in_array($actions[$k+1]['name'], $tagsToClean)) $actions[$k]['param']['txt'] = rtrim($actions[$k]['param']['txt']); // if the text is empty => remove the action if (!strlen($actions[$k]['param']['txt'])) unset($actions[$k]); } } // if we are not on the level 0 => HTML validator ERROR if (count($parents)) throw new HTML2PDF_exception(5, $parents); // save the actions to do $this->code = array_values($actions); } /** * prepare the text * * @param string texte * @param boolean true => replace multiple space+\t+\r+\n by a single space * @return string texte * @access protected */ protected function _prepareTxt($txt, $spaces = true) { if ($spaces) $txt = preg_replace('/\s+/is', ' ', $txt); $txt = str_replace('€', '€', $txt); $txt = html_entity_decode($txt, ENT_QUOTES, $this->_encoding); return $txt; } /** * parse the HTML code * * @param &array array's result * @return null */ protected function _searchCode(&$tmp) { // initialise the array $tmp = array(); // regexp to separate the tags from the texts $reg = '/(<[^>]+>)|([^<]+)+/isU'; // last match found $str = ''; $offset = 0; // As it finds a match while (preg_match($reg, $this->_html, $parse, PREG_OFFSET_CAPTURE, $offset)) { // if it is a tag if ($parse[1][0]) { // save the previous text if it exists if ($str!=='') $tmp[] = array('txt', $str); // save the tag, with the offset $tmp[] = array('code', trim($parse[1][0]), $offset); // init the current text $str = ''; } else { // else (if it is a text) // add the new text to the current text $str.= $parse[2][0]; } // Update offset to the end of the match $offset = $parse[0][1] + strlen($parse[0][0]); unset($parse); } // if a text is present in the end, we save it if ($str!='') $tmp[] = array('txt', $str); unset($str); } /** * analise a HTML tag * * @param string HTML code to analise * @return array corresponding action */ protected function _analiseCode($code) { // name of the tag, opening, closure, autoclosure $tag = '<([\/]{0,1})([_a-z0-9]+)([\/>\s]+)'; if (!preg_match('/'.$tag.'/isU', $code, $match)) return null; $close = ($match[1]=='/' ? true : false); $autoclose = preg_match('/\/>$/isU', $code); $name = strtolower($match[2]); // required parameters (depends on the tag name) $param = array(); $param['style'] = ''; if ($name=='img') { $param['alt'] = ''; $param['src'] = ''; } if ($name=='a') { $param['href'] = ''; } // read the parameters : nom=valeur $prop = '([a-zA-Z0-9_]+)=([^"\'\s>]+)'; preg_match_all('/'.$prop.'/is', $code, $match); for($k=0; $k<count($match[0]); $k++) $param[trim(strtolower($match[1][$k]))] = trim($match[2][$k]); // read the parameters : nom="valeur" $prop = '([a-zA-Z0-9_]+)=["]([^"]*)["]'; preg_match_all('/'.$prop.'/is', $code, $match); for($k=0; $k<count($match[0]); $k++) $param[trim(strtolower($match[1][$k]))] = trim($match[2][$k]); // read the parameters : nom='valeur' $prop = "([a-zA-Z0-9_]+)=[']([^']*)[']"; preg_match_all('/'.$prop.'/is', $code, $match); for($k=0; $k<count($match[0]); $k++) $param[trim(strtolower($match[1][$k]))] = trim($match[2][$k]); // compliance of each parameter $color = "#000000"; $border = null; foreach ($param as $key => $val) { $key = strtolower($key); switch($key) { case 'width': unset($param[$key]); $param['style'] .= 'width: '.$val.'px; '; break; case 'align': if ($name==='img') { unset($param[$key]); $param['style'] .= 'float: '.$val.'; '; } elseif ($name!=='table') { unset($param[$key]); $param['style'] .= 'text-align: '.$val.'; '; } break; case 'valign': unset($param[$key]); $param['style'] .= 'vertical-align: '.$val.'; '; break; case 'height': unset($param[$key]); $param['style'] .= 'height: '.$val.'px; '; break; case 'bgcolor': unset($param[$key]); $param['style'] .= 'background: '.$val.'; '; break; case 'bordercolor': unset($param[$key]); $color = $val; break; case 'border': unset($param[$key]); if (preg_match('/^[0-9]+$/isU', $val)) $val = $val.'px'; $border = $val; break; case 'cellpadding': case 'cellspacing': if (preg_match('/^([0-9]+)$/isU', $val)) $param[$key] = $val.'px'; break; case 'colspan': case 'rowspan': $val = preg_replace('/[^0-9]/isU', '', $val); if (!$val) $val = 1; $param[$key] = $val; break; } } // compliance of the border if ($border!==null) { if ($border) $border = 'border: solid '.$border.' '.$color; else $border = 'border: none'; $param['style'] .= $border.'; '; $param['border'] = $border; } // reading styles: decomposition and standardization $styles = explode(';', $param['style']); $param['style'] = array(); foreach ($styles as $style) { $tmp = explode(':', $style); if (count($tmp)>1) { $cod = $tmp[0]; unset($tmp[0]); $tmp = implode(':', $tmp); $param['style'][trim(strtolower($cod))] = preg_replace('/[\s]+/isU', ' ', trim($tmp)); } } // determining the level of table opening, with an added level if (in_array($name, array('ul', 'ol', 'table')) && !$close) { $this->_num++; $this->_level[count($this->_level)] = $this->_num; } // get the level of the table containing the element if (!isset($param['num'])) { $param['num'] = $this->_level[count($this->_level)-1]; } // for closures table: remove a level if (in_array($name, array('ul', 'ol', 'table')) && $close) { unset($this->_level[count($this->_level)-1]); } // prepare the parameters if (isset($param['value'])) $param['value'] = $this->_prepareTxt($param['value']); if (isset($param['alt'])) $param['alt'] = $this->_prepareTxt($param['alt']); if (isset($param['title'])) $param['title'] = $this->_prepareTxt($param['title']); if (isset($param['class'])) $param['class'] = $this->_prepareTxt($param['class']); // return the new action to do return array('name' => $name, 'close' => $close ? 1 : 0, 'autoclose' => $autoclose, 'param' => $param); } /** * get a full level of HTML, between an opening and closing corresponding * * @param integer key * @return array actions */ public function getLevel($k) { // if the code does not exist => return empty if (!isset($this->code[$k])) return array(); // the tag to detect $detect = $this->code[$k]['name']; // if it is a text => return if ($detect=='write') { return array($this->code[$k]); } // $level = 0; // depth level $end = false; // end of the search $code = array(); // extract code // while it's not ended while (!$end) { // current action $row = $this->code[$k]; // if 'write' => we add the text if ($row['name']=='write') { $code[] = $row; } else { // else, it is a html tag $not = false; // flag for not taking into account the current tag // if it is the searched tag if ($row['name']==$detect) { // if we are just at the root level => dont take it if ($level==0) { $not = true; } // update the level $level+= ($row['close'] ? -1 : 1); // if we are now at the root level => it is the end, and dont take it if ($level==0) { $not = true; $end = true; } } // if we can takin into account the current tag => save it if (!$not) { if (isset($row['style']['text-align'])) unset($row['style']['text-align']); $code[] = $row; } } // it continues as long as there has code to analise if (isset($this->code[$k+1])) $k++; else $end = true; } // return the extract return $code; } /** * return a part of the HTML code, for error message * * @param integer position * @param integer take before * @param integer take after * @return string part of the html code */ public function getHtmlErrorCode($pos, $before=30, $after=40) { return substr($this->_html, $pos-$before, $before+$after); } }