webkit  2cdf99a9e3038c7e01b3c37e8ad903ecbe5eecf1
https://github.com/WebKit/webkit
Public Member Functions | Public Attributes | Static Public Attributes | List of all members
webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup Class Reference
Inheritance diagram for webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup:
webkitpy.thirdparty.BeautifulSoup.Tag webkitpy.thirdparty.BeautifulSoup.PageElement webkitpy.thirdparty.BeautifulSoup.BeautifulSOAP webkitpy.thirdparty.BeautifulSoup.BeautifulSoup webkitpy.thirdparty.BeautifulSoup.RobustXMLParser webkitpy.thirdparty.BeautifulSoup.SimplifyingSOAPParser webkitpy.thirdparty.BeautifulSoup.ICantBelieveItsBeautifulSoup webkitpy.thirdparty.BeautifulSoup.MinimalSoup webkitpy.thirdparty.BeautifulSoup.RobustHTMLParser webkitpy.thirdparty.BeautifulSoup.RobustWackAssHTMLParser webkitpy.thirdparty.BeautifulSoup.RobustInsanelyWackAssHTMLParser

Public Member Functions

def __init__ (self, markup="", parseOnlyThese=None, fromEncoding=None, markupMassage=True, smartQuotesTo=XML_ENTITIES, convertEntities=None, selfClosingTags=None, isHTML=False)
 
def convert_charref (self, name)
 
def __getattr__ (self, methodName)
 
def isSelfClosingTag (self, name)
 
def reset (self)
 
def popTag (self)
 
def pushTag (self, tag)
 
def endData (self, containerClass=NavigableString)
 
def unknown_starttag (self, name, attrs, selfClosing=0)
 
def unknown_endtag (self, name)
 
def handle_data (self, data)
 
def handle_pi (self, text)
 
def handle_comment (self, text)
 
def handle_charref (self, ref)
 
def handle_entityref (self, ref)
 
def handle_decl (self, data)
 
def parse_declaration (self, i)
 
- Public Member Functions inherited from webkitpy.thirdparty.BeautifulSoup.Tag
def __init__ (self, parser, name, attrs=None, parent=None, previous=None)
 
def getString (self)
 
def setString (self, string)
 
def getText (self, separator=u"")
 
def get (self, key, default=None)
 
def clear (self)
 
def index (self, element)
 
def has_key (self, key)
 
def __getitem__ (self, key)
 
def __iter__ (self)
 
def __len__ (self)
 
def __contains__ (self, x)
 
def __nonzero__ (self)
 
def __setitem__ (self, key, value)
 
def __delitem__ (self, key)
 
def __call__ (self, args, kwargs)
 
def __getattr__ (self, tag)
 
def __eq__ (self, other)
 
def __ne__ (self, other)
 
def __repr__ (self, encoding=DEFAULT_OUTPUT_ENCODING)
 
def __unicode__ (self)
 
def __str__ (self, encoding=DEFAULT_OUTPUT_ENCODING, prettyPrint=False, indentLevel=0)
 
def decompose (self)
 
def prettify (self, encoding=DEFAULT_OUTPUT_ENCODING)
 
def renderContents (self, encoding=DEFAULT_OUTPUT_ENCODING, prettyPrint=False, indentLevel=0)
 
def find (self, name=None, attrs={}, recursive=True, text=None, kwargs)
 
def findAll (self, name=None, attrs={}, recursive=True, text=None, limit=None, kwargs)
 
def fetchText (self, text=None, recursive=True, limit=None)
 
def firstText (self, text=None, recursive=True)
 
def childGenerator (self)
 
def recursiveChildGenerator (self)
 
- Public Member Functions inherited from webkitpy.thirdparty.BeautifulSoup.PageElement
def setup (self, parent=None, previous=None)
 
def replaceWith (self, replaceWith)
 
def replaceWithChildren (self)
 
def extract (self)
 
def insert (self, position, newChild)
 
def append (self, tag)
 
def findNext (self, name=None, attrs={}, text=None, kwargs)
 
def findAllNext (self, name=None, attrs={}, text=None, limit=None, kwargs)
 
def findNextSibling (self, name=None, attrs={}, text=None, kwargs)
 
def findNextSiblings (self, name=None, attrs={}, text=None, limit=None, kwargs)
 
def findPrevious (self, name=None, attrs={}, text=None, kwargs)
 
def findAllPrevious (self, name=None, attrs={}, text=None, limit=None, kwargs)
 
def findPreviousSibling (self, name=None, attrs={}, text=None, kwargs)
 
def findPreviousSiblings (self, name=None, attrs={}, text=None, limit=None, kwargs)
 
def findParent (self, name=None, attrs={}, kwargs)
 
def findParents (self, name=None, attrs={}, limit=None, kwargs)
 
def nextGenerator (self)
 
def nextSiblingGenerator (self)
 
def previousGenerator (self)
 
def previousSiblingGenerator (self)
 
def parentGenerator (self)
 
def substituteEncoding (self, str, encoding=None)
 
def toEncoding (self, s, encoding=None)
 

Public Attributes

 parseOnlyThese
 
 fromEncoding
 
 smartQuotesTo
 
 convertEntities
 
 convertXMLEntities
 
 convertHTMLEntities
 
 escapeUnrecognizedEntities
 
 instanceSelfClosingTags
 
 markup
 
 markupMassage
 
 originalEncoding
 
 declaredHTMLEncoding
 
 hidden
 
 currentData
 
 currentTag
 
 tagStack
 
 quoteStack
 
 previous
 
 literal
 
- Public Attributes inherited from webkitpy.thirdparty.BeautifulSoup.Tag
 parserClass
 
 isSelfClosing
 
 name
 
 attrs
 
 contents
 
 hidden
 
 containsSubstitutions
 
 convertHTMLEntities
 
 convertXMLEntities
 
 escapeUnrecognizedEntities
 
 attrMap
 
- Public Attributes inherited from webkitpy.thirdparty.BeautifulSoup.PageElement
 parent
 
 previous
 
 next
 
 previousSibling
 
 nextSibling
 

Static Public Attributes

dictionary SELF_CLOSING_TAGS = {}
 
dictionary NESTABLE_TAGS = {}
 
dictionary RESET_NESTING_TAGS = {}
 
dictionary QUOTE_TAGS = {}
 
list PRESERVE_WHITESPACE_TAGS = []
 
list MARKUP_MASSAGE
 
string ROOT_TAG_NAME = u'[document]'
 
string HTML_ENTITIES = "html"
 
string XML_ENTITIES = "xml"
 
string XHTML_ENTITIES = "xhtml"
 
string ALL_ENTITIES = XHTML_ENTITIES
 
dictionary STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, }
 
- Static Public Attributes inherited from webkitpy.thirdparty.BeautifulSoup.Tag
dictionary XML_ENTITIES_TO_SPECIAL_CHARS
 
def XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS)
 
 BARE_AMPERSAND_OR_BRACKET
 
def findChild = find
 
def findChildren = findAll
 
def first = find
 
def fetch = findAll
 
- Static Public Attributes inherited from webkitpy.thirdparty.BeautifulSoup.PageElement
def fetchNextSiblings = findNextSiblings
 
def fetchPrevious = findAllPrevious
 
def fetchPreviousSiblings = findPreviousSiblings
 
def fetchParents = findParents
 

Additional Inherited Members

- Properties inherited from webkitpy.thirdparty.BeautifulSoup.Tag
 string = property(getString, setString)
 
 text = property(getText)
 

Detailed Description

This class contains the basic parser and search code. It defines
a parser that knows nothing about tag behavior except for the
following:

  You can't close a tag without closing all the tags it encloses.
  That is, "<foo><bar></foo>" actually means
  "<foo><bar></bar></foo>".

[Another possible explanation is "<foo><bar /></foo>", but since
this class defines no SELF_CLOSING_TAGS, it will never use that
explanation.]

This class is useful for parsing XML or made-up markup languages,
or when BeautifulSoup makes an assumption counter to what you were
expecting.

Constructor & Destructor Documentation

◆ __init__()

def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.__init__ (   self,
  markup = "",
  parseOnlyThese = None,
  fromEncoding = None,
  markupMassage = True,
  smartQuotesTo = XML_ENTITIES,
  convertEntities = None,
  selfClosingTags = None,
  isHTML = False 
)
The Soup object is initialized as the 'root tag', and the
provided markup (which can be a string or a file-like object)
is fed into the underlying parser.

sgmllib will process most bad HTML, and the BeautifulSoup
class has some tricks for dealing with some HTML that kills
sgmllib, but Beautiful Soup can nonetheless choke or lose data
if your data uses self-closing tags or declarations
incorrectly.

By default, Beautiful Soup uses regexes to sanitize input,
avoiding the vast majority of these problems. If the problems
don't apply to you, pass in False for markupMassage, and
you'll get better performance.

The default parser massage techniques fix the two most common
instances of invalid HTML that choke sgmllib:

 <br/> (No space between name of closing tag and tag close)
 <! --Comment--> (Extraneous whitespace in declaration)

You can pass in a custom list of (RE object, replace method)
tuples to get Beautiful Soup to scrub your input the way you
want.

Member Function Documentation

◆ __getattr__()

def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.__getattr__ (   self,
  methodName 
)
This method routes method call requests to either the SGMLParser
superclass or the Tag superclass, depending on the method name.

◆ convert_charref()

def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.convert_charref (   self,
  name 
)
This method fixes a bug in Python's SGMLParser.

◆ endData()

def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.endData (   self,
  containerClass = NavigableString 
)

◆ handle_charref()

def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.handle_charref (   self,
  ref 
)

◆ handle_comment()

def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.handle_comment (   self,
  text 
)

◆ handle_data()

def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.handle_data (   self,
  data 
)

◆ handle_decl()

def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.handle_decl (   self,
  data 
)

◆ handle_entityref()

def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.handle_entityref (   self,
  ref 
)
Handle entity references as data, possibly converting known
HTML and/or XML entity references to the corresponding Unicode
characters.

◆ handle_pi()

def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.handle_pi (   self,
  text 
)
Handle a processing instruction as a ProcessingInstruction
object, possibly one with a %SOUP-ENCODING% slot into which an
encoding will be plugged later.

◆ isSelfClosingTag()

def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.isSelfClosingTag (   self,
  name 
)
Returns true iff the given string is the name of a
self-closing tag according to this parser.

◆ parse_declaration()

def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.parse_declaration (   self,
  i 
)
Treat a bogus SGML declaration as raw data. Treat a CDATA
declaration as a CData object.

◆ popTag()

def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.popTag (   self)

◆ pushTag()

def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.pushTag (   self,
  tag 
)

◆ reset()

def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.reset (   self)

◆ unknown_endtag()

def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.unknown_endtag (   self,
  name 
)

◆ unknown_starttag()

def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.unknown_starttag (   self,
  name,
  attrs,
  selfClosing = 0 
)

Member Data Documentation

◆ ALL_ENTITIES

string webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.ALL_ENTITIES = XHTML_ENTITIES
static

◆ convertEntities

webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.convertEntities

◆ convertHTMLEntities

webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.convertHTMLEntities

◆ convertXMLEntities

webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.convertXMLEntities

◆ currentData

webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.currentData

◆ currentTag

webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.currentTag

◆ declaredHTMLEncoding

webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.declaredHTMLEncoding

◆ escapeUnrecognizedEntities

webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.escapeUnrecognizedEntities

◆ fromEncoding

webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.fromEncoding

◆ hidden

webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.hidden

◆ HTML_ENTITIES

string webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.HTML_ENTITIES = "html"
static

◆ instanceSelfClosingTags

webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.instanceSelfClosingTags

◆ literal

webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.literal

◆ markup

webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.markup

◆ MARKUP_MASSAGE

list webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.MARKUP_MASSAGE
static
Initial value:
= [(re.compile('(<[^<>]*)/>'),
lambda x: x.group(1) + ' />'),
(re.compile('<!\s+([^<>]*)>'),
lambda x: '<!' + x.group(1) + '>')
]

◆ markupMassage

webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.markupMassage

◆ NESTABLE_TAGS

dictionary webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.NESTABLE_TAGS = {}
static

◆ originalEncoding

webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.originalEncoding

◆ parseOnlyThese

webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.parseOnlyThese

◆ PRESERVE_WHITESPACE_TAGS

list webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.PRESERVE_WHITESPACE_TAGS = []
static

◆ previous

webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.previous

◆ QUOTE_TAGS

dictionary webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.QUOTE_TAGS = {}
static

◆ quoteStack

webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.quoteStack

◆ RESET_NESTING_TAGS

dictionary webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.RESET_NESTING_TAGS = {}
static

◆ ROOT_TAG_NAME

string webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.ROOT_TAG_NAME = u'[document]'
static

◆ SELF_CLOSING_TAGS

dictionary webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.SELF_CLOSING_TAGS = {}
static

◆ smartQuotesTo

webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.smartQuotesTo

◆ STRIP_ASCII_SPACES

dictionary webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, }
static

◆ tagStack

webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.tagStack

◆ XHTML_ENTITIES

string webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.XHTML_ENTITIES = "xhtml"
static

◆ XML_ENTITIES

string webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.XML_ENTITIES = "xml"
static

The documentation for this class was generated from the following file: