webkit  2cdf99a9e3038c7e01b3c37e8ad903ecbe5eecf1
https://github.com/WebKit/webkit
Public Member Functions | Public Attributes | Static Public Attributes | List of all members
webkitpy.thirdparty.BeautifulSoup.BeautifulSoup Class Reference
Inheritance diagram for webkitpy.thirdparty.BeautifulSoup.BeautifulSoup:
webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup webkitpy.thirdparty.BeautifulSoup.Tag webkitpy.thirdparty.BeautifulSoup.PageElement webkitpy.thirdparty.BeautifulSoup.ICantBelieveItsBeautifulSoup webkitpy.thirdparty.BeautifulSoup.MinimalSoup webkitpy.thirdparty.BeautifulSoup.RobustHTMLParser webkitpy.thirdparty.BeautifulSoup.RobustWackAssHTMLParser webkitpy.thirdparty.BeautifulSoup.RobustInsanelyWackAssHTMLParser

Public Member Functions

def __init__ (self, args, kwargs)
 
def start_meta (self, attrs)
 
- Public Member Functions inherited from webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup
def __init__ (self, markup="", parseOnlyThese=None, fromEncoding=None, markupMassage=True, smartQuotesTo=XML_ENTITIES, convertEntities=None, selfClosingTags=None, isHTML=False)
 
def convert_charref (self, name)
 
def __getattr__ (self, methodName)
 
def isSelfClosingTag (self, name)
 
def reset (self)
 
def popTag (self)
 
def pushTag (self, tag)
 
def endData (self, containerClass=NavigableString)
 
def unknown_starttag (self, name, attrs, selfClosing=0)
 
def unknown_endtag (self, name)
 
def handle_data (self, data)
 
def handle_pi (self, text)
 
def handle_comment (self, text)
 
def handle_charref (self, ref)
 
def handle_entityref (self, ref)
 
def handle_decl (self, data)
 
def parse_declaration (self, i)
 
- Public Member Functions inherited from webkitpy.thirdparty.BeautifulSoup.Tag
def __init__ (self, parser, name, attrs=None, parent=None, previous=None)
 
def getString (self)
 
def setString (self, string)
 
def getText (self, separator=u"")
 
def get (self, key, default=None)
 
def clear (self)
 
def index (self, element)
 
def has_key (self, key)
 
def __getitem__ (self, key)
 
def __iter__ (self)
 
def __len__ (self)
 
def __contains__ (self, x)
 
def __nonzero__ (self)
 
def __setitem__ (self, key, value)
 
def __delitem__ (self, key)
 
def __call__ (self, args, kwargs)
 
def __getattr__ (self, tag)
 
def __eq__ (self, other)
 
def __ne__ (self, other)
 
def __repr__ (self, encoding=DEFAULT_OUTPUT_ENCODING)
 
def __unicode__ (self)
 
def __str__ (self, encoding=DEFAULT_OUTPUT_ENCODING, prettyPrint=False, indentLevel=0)
 
def decompose (self)
 
def prettify (self, encoding=DEFAULT_OUTPUT_ENCODING)
 
def renderContents (self, encoding=DEFAULT_OUTPUT_ENCODING, prettyPrint=False, indentLevel=0)
 
def find (self, name=None, attrs={}, recursive=True, text=None, kwargs)
 
def findAll (self, name=None, attrs={}, recursive=True, text=None, limit=None, kwargs)
 
def fetchText (self, text=None, recursive=True, limit=None)
 
def firstText (self, text=None, recursive=True)
 
def childGenerator (self)
 
def recursiveChildGenerator (self)
 
- Public Member Functions inherited from webkitpy.thirdparty.BeautifulSoup.PageElement
def setup (self, parent=None, previous=None)
 
def replaceWith (self, replaceWith)
 
def replaceWithChildren (self)
 
def extract (self)
 
def insert (self, position, newChild)
 
def append (self, tag)
 
def findNext (self, name=None, attrs={}, text=None, kwargs)
 
def findAllNext (self, name=None, attrs={}, text=None, limit=None, kwargs)
 
def findNextSibling (self, name=None, attrs={}, text=None, kwargs)
 
def findNextSiblings (self, name=None, attrs={}, text=None, limit=None, kwargs)
 
def findPrevious (self, name=None, attrs={}, text=None, kwargs)
 
def findAllPrevious (self, name=None, attrs={}, text=None, limit=None, kwargs)
 
def findPreviousSibling (self, name=None, attrs={}, text=None, kwargs)
 
def findPreviousSiblings (self, name=None, attrs={}, text=None, limit=None, kwargs)
 
def findParent (self, name=None, attrs={}, kwargs)
 
def findParents (self, name=None, attrs={}, limit=None, kwargs)
 
def nextGenerator (self)
 
def nextSiblingGenerator (self)
 
def previousGenerator (self)
 
def previousSiblingGenerator (self)
 
def parentGenerator (self)
 
def substituteEncoding (self, str, encoding=None)
 
def toEncoding (self, s, encoding=None)
 

Public Attributes

 originalEncoding
 
 declaredHTMLEncoding
 
- Public Attributes inherited from webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup
 parseOnlyThese
 
 fromEncoding
 
 smartQuotesTo
 
 convertEntities
 
 convertXMLEntities
 
 convertHTMLEntities
 
 escapeUnrecognizedEntities
 
 instanceSelfClosingTags
 
 markup
 
 markupMassage
 
 originalEncoding
 
 declaredHTMLEncoding
 
 hidden
 
 currentData
 
 currentTag
 
 tagStack
 
 quoteStack
 
 previous
 
 literal
 
- Public Attributes inherited from webkitpy.thirdparty.BeautifulSoup.Tag
 parserClass
 
 isSelfClosing
 
 name
 
 attrs
 
 contents
 
 hidden
 
 containsSubstitutions
 
 convertHTMLEntities
 
 convertXMLEntities
 
 escapeUnrecognizedEntities
 
 attrMap
 
- Public Attributes inherited from webkitpy.thirdparty.BeautifulSoup.PageElement
 parent
 
 previous
 
 next
 
 previousSibling
 
 nextSibling
 

Static Public Attributes

 SELF_CLOSING_TAGS
 
 PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea'])
 
dictionary QUOTE_TAGS = {'script' : None, 'textarea' : None}
 
tuple NESTABLE_INLINE_TAGS
 
tuple NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del')
 
dictionary NESTABLE_LIST_TAGS
 
dictionary NESTABLE_TABLE_TAGS
 
tuple NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre')
 
 RESET_NESTING_TAGS
 
 NESTABLE_TAGS
 
 CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M)
 
- Static Public Attributes inherited from webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup
dictionary SELF_CLOSING_TAGS = {}
 
dictionary NESTABLE_TAGS = {}
 
dictionary RESET_NESTING_TAGS = {}
 
dictionary QUOTE_TAGS = {}
 
list PRESERVE_WHITESPACE_TAGS = []
 
list MARKUP_MASSAGE
 
string ROOT_TAG_NAME = u'[document]'
 
string HTML_ENTITIES = "html"
 
string XML_ENTITIES = "xml"
 
string XHTML_ENTITIES = "xhtml"
 
string ALL_ENTITIES = XHTML_ENTITIES
 
dictionary STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, }
 
- Static Public Attributes inherited from webkitpy.thirdparty.BeautifulSoup.Tag
dictionary XML_ENTITIES_TO_SPECIAL_CHARS
 
def XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS)
 
 BARE_AMPERSAND_OR_BRACKET
 
def findChild = find
 
def findChildren = findAll
 
def first = find
 
def fetch = findAll
 
- Static Public Attributes inherited from webkitpy.thirdparty.BeautifulSoup.PageElement
def fetchNextSiblings = findNextSiblings
 
def fetchPrevious = findAllPrevious
 
def fetchPreviousSiblings = findPreviousSiblings
 
def fetchParents = findParents
 

Additional Inherited Members

- Properties inherited from webkitpy.thirdparty.BeautifulSoup.Tag
 string = property(getString, setString)
 
 text = property(getText)
 

Detailed Description

This parser knows the following facts about HTML:

* Some tags have no closing tag and should be interpreted as being
  closed as soon as they are encountered.

* The text inside some tags (ie. 'script') may contain tags which
  are not really part of the document and which should be parsed
  as text, not tags. If you want to parse the text as tags, you can
  always fetch it and parse it explicitly.

* Tag nesting rules:

  Most tags can't be nested at all. For instance, the occurance of
  a <p> tag should implicitly close the previous <p> tag.

   <p>Para1<p>Para2
    should be transformed into:
   <p>Para1</p><p>Para2

  Some tags can be nested arbitrarily. For instance, the occurance
  of a <blockquote> tag should _not_ implicitly close the previous
  <blockquote> tag.

   Alice said: <blockquote>Bob said: <blockquote>Blah
    should NOT be transformed into:
   Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah

  Some tags can be nested, but the nesting is reset by the
  interposition of other tags. For instance, a <tr> tag should
  implicitly close the previous <tr> tag within the same <table>,
  but not close a <tr> tag in another table.

   <table><tr>Blah<tr>Blah
    should be transformed into:
   <table><tr>Blah</tr><tr>Blah
    but,
   <tr>Blah<table><tr>Blah
    should NOT be transformed into
   <tr>Blah<table></tr><tr>Blah

Differing assumptions about tag nesting rules are a major source
of problems with the BeautifulSoup class. If BeautifulSoup is not
treating as nestable a tag your page author treats as nestable,
try ICantBelieveItsBeautifulSoup, MinimalSoup, or
BeautifulStoneSoup before writing your own subclass.

Constructor & Destructor Documentation

◆ __init__()

def webkitpy.thirdparty.BeautifulSoup.BeautifulSoup.__init__ (   self,
  args,
  kwargs 
)

Member Function Documentation

◆ start_meta()

def webkitpy.thirdparty.BeautifulSoup.BeautifulSoup.start_meta (   self,
  attrs 
)
Beautiful Soup can detect a charset included in a META tag,
try to convert the document to that charset, and re-parse the
document from the beginning.

Member Data Documentation

◆ CHARSET_RE

webkitpy.thirdparty.BeautifulSoup.BeautifulSoup.CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M)
static

◆ declaredHTMLEncoding

webkitpy.thirdparty.BeautifulSoup.BeautifulSoup.declaredHTMLEncoding

◆ NESTABLE_BLOCK_TAGS

tuple webkitpy.thirdparty.BeautifulSoup.BeautifulSoup.NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del')
static

◆ NESTABLE_INLINE_TAGS

tuple webkitpy.thirdparty.BeautifulSoup.BeautifulSoup.NESTABLE_INLINE_TAGS
static
Initial value:
= ('span', 'font', 'q', 'object', 'bdo', 'sub', 'sup',
'center')

◆ NESTABLE_LIST_TAGS

dictionary webkitpy.thirdparty.BeautifulSoup.BeautifulSoup.NESTABLE_LIST_TAGS
static
Initial value:
= { 'ol' : [],
'ul' : [],
'li' : ['ul', 'ol'],
'dl' : [],
'dd' : ['dl'],
'dt' : ['dl'] }

◆ NESTABLE_TABLE_TAGS

dictionary webkitpy.thirdparty.BeautifulSoup.BeautifulSoup.NESTABLE_TABLE_TAGS
static
Initial value:
= {'table' : [],
'tr' : ['table', 'tbody', 'tfoot', 'thead'],
'td' : ['tr'],
'th' : ['tr'],
'thead' : ['table'],
'tbody' : ['table'],
'tfoot' : ['table'],
}

◆ NESTABLE_TAGS

webkitpy.thirdparty.BeautifulSoup.BeautifulSoup.NESTABLE_TAGS
static
Initial value:
= buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS,
NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS)

◆ NON_NESTABLE_BLOCK_TAGS

tuple webkitpy.thirdparty.BeautifulSoup.BeautifulSoup.NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre')
static

◆ originalEncoding

webkitpy.thirdparty.BeautifulSoup.BeautifulSoup.originalEncoding

◆ PRESERVE_WHITESPACE_TAGS

webkitpy.thirdparty.BeautifulSoup.BeautifulSoup.PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea'])
static

◆ QUOTE_TAGS

dictionary webkitpy.thirdparty.BeautifulSoup.BeautifulSoup.QUOTE_TAGS = {'script' : None, 'textarea' : None}
static

◆ RESET_NESTING_TAGS

webkitpy.thirdparty.BeautifulSoup.BeautifulSoup.RESET_NESTING_TAGS
static
Initial value:
= buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript',
NON_NESTABLE_BLOCK_TAGS,
NESTABLE_LIST_TAGS,
NESTABLE_TABLE_TAGS)

◆ SELF_CLOSING_TAGS

webkitpy.thirdparty.BeautifulSoup.BeautifulSoup.SELF_CLOSING_TAGS
static
Initial value:
= buildTagMap(None,
('br' , 'hr', 'input', 'img', 'meta',
'spacer', 'link', 'frame', 'base', 'col'))

The documentation for this class was generated from the following file: