webkit  2cdf99a9e3038c7e01b3c37e8ad903ecbe5eecf1
https://github.com/WebKit/webkit
Static Public Attributes | List of all members
webkitpy.thirdparty.BeautifulSoup.MinimalSoup Class Reference
Inheritance diagram for webkitpy.thirdparty.BeautifulSoup.MinimalSoup:
webkitpy.thirdparty.BeautifulSoup.BeautifulSoup webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup webkitpy.thirdparty.BeautifulSoup.Tag webkitpy.thirdparty.BeautifulSoup.PageElement webkitpy.thirdparty.BeautifulSoup.RobustInsanelyWackAssHTMLParser

Static Public Attributes

 RESET_NESTING_TAGS = buildTagMap('noscript')
 
dictionary NESTABLE_TAGS = {}
 
- Static Public Attributes inherited from webkitpy.thirdparty.BeautifulSoup.BeautifulSoup
 SELF_CLOSING_TAGS
 
 PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea'])
 
dictionary QUOTE_TAGS = {'script' : None, 'textarea' : None}
 
tuple NESTABLE_INLINE_TAGS
 
tuple NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del')
 
dictionary NESTABLE_LIST_TAGS
 
dictionary NESTABLE_TABLE_TAGS
 
tuple NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre')
 
 RESET_NESTING_TAGS
 
 NESTABLE_TAGS
 
 CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M)
 
- Static Public Attributes inherited from webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup
dictionary SELF_CLOSING_TAGS = {}
 
dictionary NESTABLE_TAGS = {}
 
dictionary RESET_NESTING_TAGS = {}
 
dictionary QUOTE_TAGS = {}
 
list PRESERVE_WHITESPACE_TAGS = []
 
list MARKUP_MASSAGE
 
string ROOT_TAG_NAME = u'[document]'
 
string HTML_ENTITIES = "html"
 
string XML_ENTITIES = "xml"
 
string XHTML_ENTITIES = "xhtml"
 
string ALL_ENTITIES = XHTML_ENTITIES
 
dictionary STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, }
 
- Static Public Attributes inherited from webkitpy.thirdparty.BeautifulSoup.Tag
dictionary XML_ENTITIES_TO_SPECIAL_CHARS
 
def XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS)
 
 BARE_AMPERSAND_OR_BRACKET
 
def findChild = find
 
def findChildren = findAll
 
def first = find
 
def fetch = findAll
 
- Static Public Attributes inherited from webkitpy.thirdparty.BeautifulSoup.PageElement
def fetchNextSiblings = findNextSiblings
 
def fetchPrevious = findAllPrevious
 
def fetchPreviousSiblings = findPreviousSiblings
 
def fetchParents = findParents
 

Additional Inherited Members

- Public Member Functions inherited from webkitpy.thirdparty.BeautifulSoup.BeautifulSoup
def __init__ (self, args, kwargs)
 
def start_meta (self, attrs)
 
- Public Member Functions inherited from webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup
def __init__ (self, markup="", parseOnlyThese=None, fromEncoding=None, markupMassage=True, smartQuotesTo=XML_ENTITIES, convertEntities=None, selfClosingTags=None, isHTML=False)
 
def convert_charref (self, name)
 
def __getattr__ (self, methodName)
 
def isSelfClosingTag (self, name)
 
def reset (self)
 
def popTag (self)
 
def pushTag (self, tag)
 
def endData (self, containerClass=NavigableString)
 
def unknown_starttag (self, name, attrs, selfClosing=0)
 
def unknown_endtag (self, name)
 
def handle_data (self, data)
 
def handle_pi (self, text)
 
def handle_comment (self, text)
 
def handle_charref (self, ref)
 
def handle_entityref (self, ref)
 
def handle_decl (self, data)
 
def parse_declaration (self, i)
 
- Public Member Functions inherited from webkitpy.thirdparty.BeautifulSoup.Tag
def __init__ (self, parser, name, attrs=None, parent=None, previous=None)
 
def getString (self)
 
def setString (self, string)
 
def getText (self, separator=u"")
 
def get (self, key, default=None)
 
def clear (self)
 
def index (self, element)
 
def has_key (self, key)
 
def __getitem__ (self, key)
 
def __iter__ (self)
 
def __len__ (self)
 
def __contains__ (self, x)
 
def __nonzero__ (self)
 
def __setitem__ (self, key, value)
 
def __delitem__ (self, key)
 
def __call__ (self, args, kwargs)
 
def __getattr__ (self, tag)
 
def __eq__ (self, other)
 
def __ne__ (self, other)
 
def __repr__ (self, encoding=DEFAULT_OUTPUT_ENCODING)
 
def __unicode__ (self)
 
def __str__ (self, encoding=DEFAULT_OUTPUT_ENCODING, prettyPrint=False, indentLevel=0)
 
def decompose (self)
 
def prettify (self, encoding=DEFAULT_OUTPUT_ENCODING)
 
def renderContents (self, encoding=DEFAULT_OUTPUT_ENCODING, prettyPrint=False, indentLevel=0)
 
def find (self, name=None, attrs={}, recursive=True, text=None, kwargs)
 
def findAll (self, name=None, attrs={}, recursive=True, text=None, limit=None, kwargs)
 
def fetchText (self, text=None, recursive=True, limit=None)
 
def firstText (self, text=None, recursive=True)
 
def childGenerator (self)
 
def recursiveChildGenerator (self)
 
- Public Member Functions inherited from webkitpy.thirdparty.BeautifulSoup.PageElement
def setup (self, parent=None, previous=None)
 
def replaceWith (self, replaceWith)
 
def replaceWithChildren (self)
 
def extract (self)
 
def insert (self, position, newChild)
 
def append (self, tag)
 
def findNext (self, name=None, attrs={}, text=None, kwargs)
 
def findAllNext (self, name=None, attrs={}, text=None, limit=None, kwargs)
 
def findNextSibling (self, name=None, attrs={}, text=None, kwargs)
 
def findNextSiblings (self, name=None, attrs={}, text=None, limit=None, kwargs)
 
def findPrevious (self, name=None, attrs={}, text=None, kwargs)
 
def findAllPrevious (self, name=None, attrs={}, text=None, limit=None, kwargs)
 
def findPreviousSibling (self, name=None, attrs={}, text=None, kwargs)
 
def findPreviousSiblings (self, name=None, attrs={}, text=None, limit=None, kwargs)
 
def findParent (self, name=None, attrs={}, kwargs)
 
def findParents (self, name=None, attrs={}, limit=None, kwargs)
 
def nextGenerator (self)
 
def nextSiblingGenerator (self)
 
def previousGenerator (self)
 
def previousSiblingGenerator (self)
 
def parentGenerator (self)
 
def substituteEncoding (self, str, encoding=None)
 
def toEncoding (self, s, encoding=None)
 
- Public Attributes inherited from webkitpy.thirdparty.BeautifulSoup.BeautifulSoup
 originalEncoding
 
 declaredHTMLEncoding
 
- Public Attributes inherited from webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup
 parseOnlyThese
 
 fromEncoding
 
 smartQuotesTo
 
 convertEntities
 
 convertXMLEntities
 
 convertHTMLEntities
 
 escapeUnrecognizedEntities
 
 instanceSelfClosingTags
 
 markup
 
 markupMassage
 
 originalEncoding
 
 declaredHTMLEncoding
 
 hidden
 
 currentData
 
 currentTag
 
 tagStack
 
 quoteStack
 
 previous
 
 literal
 
- Public Attributes inherited from webkitpy.thirdparty.BeautifulSoup.Tag
 parserClass
 
 isSelfClosing
 
 name
 
 attrs
 
 contents
 
 hidden
 
 containsSubstitutions
 
 convertHTMLEntities
 
 convertXMLEntities
 
 escapeUnrecognizedEntities
 
 attrMap
 
- Public Attributes inherited from webkitpy.thirdparty.BeautifulSoup.PageElement
 parent
 
 previous
 
 next
 
 previousSibling
 
 nextSibling
 
- Properties inherited from webkitpy.thirdparty.BeautifulSoup.Tag
 string = property(getString, setString)
 
 text = property(getText)
 

Detailed Description

The MinimalSoup class is for parsing HTML that contains
pathologically bad markup. It makes no assumptions about tag
nesting, but it does know which tags are self-closing, that
<script> tags contain Javascript and should not be parsed, that
META tags may contain encoding information, and so on.

This also makes it better for subclassing than BeautifulStoneSoup
or BeautifulSoup.

Member Data Documentation

◆ NESTABLE_TAGS

dictionary webkitpy.thirdparty.BeautifulSoup.MinimalSoup.NESTABLE_TAGS = {}
static

◆ RESET_NESTING_TAGS

webkitpy.thirdparty.BeautifulSoup.MinimalSoup.RESET_NESTING_TAGS = buildTagMap('noscript')
static

The documentation for this class was generated from the following file: