Server IP : 103.119.228.120 / Your IP : 18.222.166.127 Web Server : Apache System : Linux v8.techscape8.com 3.10.0-1160.119.1.el7.tuxcare.els2.x86_64 #1 SMP Mon Jul 15 12:09:18 UTC 2024 x86_64 User : nobody ( 99) PHP Version : 5.6.40 Disable Function : shell_exec,symlink,system,exec,proc_get_status,proc_nice,proc_terminate,define_syslog_variables,syslog,openlog,closelog,escapeshellcmd,passthru,ocinum cols,ini_alter,leak,listen,chgrp,apache_note,apache_setenv,debugger_on,debugger_off,ftp_exec,dl,dll,myshellexec,proc_open,socket_bind,proc_close,escapeshellarg,parse_ini_filepopen,fpassthru,exec,passthru,escapeshellarg,escapeshellcmd,proc_close,proc_open,ini_alter,popen,show_source,proc_nice,proc_terminate,proc_get_status,proc_close,pfsockopen,leak,apache_child_terminate,posix_kill,posix_mkfifo,posix_setpgid,posix_setsid,posix_setuid,dl,symlink,shell_exec,system,dl,passthru,escapeshellarg,escapeshellcmd,myshellexec,c99_buff_prepare,c99_sess_put,fpassthru,getdisfunc,fx29exec,fx29exec2,is_windows,disp_freespace,fx29sh_getupdate,fx29_buff_prepare,fx29_sess_put,fx29shexit,fx29fsearch,fx29ftpbrutecheck,fx29sh_tools,fx29sh_about,milw0rm,imagez,sh_name,myshellexec,checkproxyhost,dosyayicek,c99_buff_prepare,c99_sess_put,c99getsource,c99sh_getupdate,c99fsearch,c99shexit,view_perms,posix_getpwuid,posix_getgrgid,posix_kill,parse_perms,parsesort,view_perms_color,set_encoder_input,ls_setcheckboxall,ls_reverse_all,rsg_read,rsg_glob,selfURL,dispsecinfo,unix2DosTime,addFile,system,get_users,view_size,DirFiles,DirFilesWide,DirPrintHTMLHeaders,GetFilesTotal,GetTitles,GetTimeTotal,GetMatchesCount,GetFileMatchesCount,GetResultFiles,fs_copy_dir,fs_copy_obj,fs_move_dir,fs_move_obj,fs_rmdir,SearchText,getmicrotime MySQL : ON | cURL : ON | WGET : ON | Perl : ON | Python : ON | Sudo : ON | Pkexec : ON Directory : /usr/local/ssl/local/ssl/local/ssl/local/ssl/lib64/python2.7/site-packages/lxml/html/ |
Upload File : |
__doc__ = """External interface to the BeautifulSoup HTML parser. """ __all__ = ["fromstring", "parse", "convert_tree"] from lxml import etree, html from BeautifulSoup import \ BeautifulSoup, Tag, Comment, ProcessingInstruction, NavigableString def fromstring(data, beautifulsoup=None, makeelement=None, **bsargs): """Parse a string of HTML data into an Element tree using the BeautifulSoup parser. Returns the root ``<html>`` Element of the tree. You can pass a different BeautifulSoup parser through the `beautifulsoup` keyword, and a diffent Element factory function through the `makeelement` keyword. By default, the standard ``BeautifulSoup`` class and the default factory of `lxml.html` are used. """ return _parse(data, beautifulsoup, makeelement, **bsargs) def parse(file, beautifulsoup=None, makeelement=None, **bsargs): """Parse a file into an ElemenTree using the BeautifulSoup parser. You can pass a different BeautifulSoup parser through the `beautifulsoup` keyword, and a diffent Element factory function through the `makeelement` keyword. By default, the standard ``BeautifulSoup`` class and the default factory of `lxml.html` are used. """ if not hasattr(file, 'read'): file = open(file) root = _parse(file, beautifulsoup, makeelement, **bsargs) return etree.ElementTree(root) def convert_tree(beautiful_soup_tree, makeelement=None): """Convert a BeautifulSoup tree to a list of Element trees. Returns a list instead of a single root Element to support HTML-like soup with more than one root element. You can pass a different Element factory through the `makeelement` keyword. """ if makeelement is None: makeelement = html.html_parser.makeelement root = _convert_tree(beautiful_soup_tree, makeelement) children = root.getchildren() for child in children: root.remove(child) return children # helpers def _parse(source, beautifulsoup, makeelement, **bsargs): if beautifulsoup is None: beautifulsoup = BeautifulSoup if makeelement is None: makeelement = html.html_parser.makeelement if 'convertEntities' not in bsargs: bsargs['convertEntities'] = 'html' tree = beautifulsoup(source, **bsargs) root = _convert_tree(tree, makeelement) # from ET: wrap the document in a html root element, if necessary if len(root) == 1 and root[0].tag == "html": return root[0] root.tag = "html" return root def _convert_tree(beautiful_soup_tree, makeelement): root = makeelement(beautiful_soup_tree.name, attrib=dict(beautiful_soup_tree.attrs)) _convert_children(root, beautiful_soup_tree, makeelement) return root def _convert_children(parent, beautiful_soup_tree, makeelement): SubElement = etree.SubElement et_child = None for child in beautiful_soup_tree: if isinstance(child, Tag): et_child = SubElement(parent, child.name, attrib=dict( [(k, unescape(v)) for (k,v) in child.attrs])) _convert_children(et_child, child, makeelement) elif type(child) is NavigableString: _append_text(parent, et_child, unescape(child)) else: if isinstance(child, Comment): parent.append(etree.Comment(child)) elif isinstance(child, ProcessingInstruction): parent.append(etree.ProcessingInstruction( *child.split(' ', 1))) else: # CData _append_text(parent, et_child, unescape(child)) def _append_text(parent, element, text): if element is None: parent.text = (parent.text or '') + text else: element.tail = (element.tail or '') + text # copied from ET's ElementSoup try: from html.entities import name2codepoint # Python 3 except ImportError: from htmlentitydefs import name2codepoint import re handle_entities = re.compile("&(\w+);").sub def unescape(string): if not string: return '' # work around oddities in BeautifulSoup's entity handling def unescape_entity(m): try: return unichr(name2codepoint[m.group(1)]) except KeyError: return m.group(0) # use as is return handle_entities(unescape_entity, string)