webkit
2cdf99a9e3038c7e01b3c37e8ad903ecbe5eecf1
https://github.com/WebKit/webkit
|
Public Member Functions | |
def | __init__ (self, markup="", parseOnlyThese=None, fromEncoding=None, markupMassage=True, smartQuotesTo=XML_ENTITIES, convertEntities=None, selfClosingTags=None, isHTML=False) |
def | convert_charref (self, name) |
def | __getattr__ (self, methodName) |
def | isSelfClosingTag (self, name) |
def | reset (self) |
def | popTag (self) |
def | pushTag (self, tag) |
def | endData (self, containerClass=NavigableString) |
def | unknown_starttag (self, name, attrs, selfClosing=0) |
def | unknown_endtag (self, name) |
def | handle_data (self, data) |
def | handle_pi (self, text) |
def | handle_comment (self, text) |
def | handle_charref (self, ref) |
def | handle_entityref (self, ref) |
def | handle_decl (self, data) |
def | parse_declaration (self, i) |
Public Member Functions inherited from webkitpy.thirdparty.BeautifulSoup.Tag | |
def | __init__ (self, parser, name, attrs=None, parent=None, previous=None) |
def | getString (self) |
def | setString (self, string) |
def | getText (self, separator=u"") |
def | get (self, key, default=None) |
def | clear (self) |
def | index (self, element) |
def | has_key (self, key) |
def | __getitem__ (self, key) |
def | __iter__ (self) |
def | __len__ (self) |
def | __contains__ (self, x) |
def | __nonzero__ (self) |
def | __setitem__ (self, key, value) |
def | __delitem__ (self, key) |
def | __call__ (self, args, kwargs) |
def | __getattr__ (self, tag) |
def | __eq__ (self, other) |
def | __ne__ (self, other) |
def | __repr__ (self, encoding=DEFAULT_OUTPUT_ENCODING) |
def | __unicode__ (self) |
def | __str__ (self, encoding=DEFAULT_OUTPUT_ENCODING, prettyPrint=False, indentLevel=0) |
def | decompose (self) |
def | prettify (self, encoding=DEFAULT_OUTPUT_ENCODING) |
def | renderContents (self, encoding=DEFAULT_OUTPUT_ENCODING, prettyPrint=False, indentLevel=0) |
def | find (self, name=None, attrs={}, recursive=True, text=None, kwargs) |
def | findAll (self, name=None, attrs={}, recursive=True, text=None, limit=None, kwargs) |
def | fetchText (self, text=None, recursive=True, limit=None) |
def | firstText (self, text=None, recursive=True) |
def | childGenerator (self) |
def | recursiveChildGenerator (self) |
Public Member Functions inherited from webkitpy.thirdparty.BeautifulSoup.PageElement | |
def | setup (self, parent=None, previous=None) |
def | replaceWith (self, replaceWith) |
def | replaceWithChildren (self) |
def | extract (self) |
def | insert (self, position, newChild) |
def | append (self, tag) |
def | findNext (self, name=None, attrs={}, text=None, kwargs) |
def | findAllNext (self, name=None, attrs={}, text=None, limit=None, kwargs) |
def | findNextSibling (self, name=None, attrs={}, text=None, kwargs) |
def | findNextSiblings (self, name=None, attrs={}, text=None, limit=None, kwargs) |
def | findPrevious (self, name=None, attrs={}, text=None, kwargs) |
def | findAllPrevious (self, name=None, attrs={}, text=None, limit=None, kwargs) |
def | findPreviousSibling (self, name=None, attrs={}, text=None, kwargs) |
def | findPreviousSiblings (self, name=None, attrs={}, text=None, limit=None, kwargs) |
def | findParent (self, name=None, attrs={}, kwargs) |
def | findParents (self, name=None, attrs={}, limit=None, kwargs) |
def | nextGenerator (self) |
def | nextSiblingGenerator (self) |
def | previousGenerator (self) |
def | previousSiblingGenerator (self) |
def | parentGenerator (self) |
def | substituteEncoding (self, str, encoding=None) |
def | toEncoding (self, s, encoding=None) |
Additional Inherited Members | |
Properties inherited from webkitpy.thirdparty.BeautifulSoup.Tag | |
string = property(getString, setString) | |
text = property(getText) | |
This class contains the basic parser and search code. It defines a parser that knows nothing about tag behavior except for the following: You can't close a tag without closing all the tags it encloses. That is, "<foo><bar></foo>" actually means "<foo><bar></bar></foo>". [Another possible explanation is "<foo><bar /></foo>", but since this class defines no SELF_CLOSING_TAGS, it will never use that explanation.] This class is useful for parsing XML or made-up markup languages, or when BeautifulSoup makes an assumption counter to what you were expecting.
def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.__init__ | ( | self, | |
markup = "" , |
|||
parseOnlyThese = None , |
|||
fromEncoding = None , |
|||
markupMassage = True , |
|||
smartQuotesTo = XML_ENTITIES , |
|||
convertEntities = None , |
|||
selfClosingTags = None , |
|||
isHTML = False |
|||
) |
The Soup object is initialized as the 'root tag', and the provided markup (which can be a string or a file-like object) is fed into the underlying parser. sgmllib will process most bad HTML, and the BeautifulSoup class has some tricks for dealing with some HTML that kills sgmllib, but Beautiful Soup can nonetheless choke or lose data if your data uses self-closing tags or declarations incorrectly. By default, Beautiful Soup uses regexes to sanitize input, avoiding the vast majority of these problems. If the problems don't apply to you, pass in False for markupMassage, and you'll get better performance. The default parser massage techniques fix the two most common instances of invalid HTML that choke sgmllib: <br/> (No space between name of closing tag and tag close) <! --Comment--> (Extraneous whitespace in declaration) You can pass in a custom list of (RE object, replace method) tuples to get Beautiful Soup to scrub your input the way you want.
def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.__getattr__ | ( | self, | |
methodName | |||
) |
This method routes method call requests to either the SGMLParser superclass or the Tag superclass, depending on the method name.
def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.convert_charref | ( | self, | |
name | |||
) |
This method fixes a bug in Python's SGMLParser.
def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.endData | ( | self, | |
containerClass = NavigableString |
|||
) |
def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.handle_charref | ( | self, | |
ref | |||
) |
def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.handle_comment | ( | self, | |
text | |||
) |
def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.handle_data | ( | self, | |
data | |||
) |
def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.handle_decl | ( | self, | |
data | |||
) |
def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.handle_entityref | ( | self, | |
ref | |||
) |
Handle entity references as data, possibly converting known HTML and/or XML entity references to the corresponding Unicode characters.
def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.handle_pi | ( | self, | |
text | |||
) |
Handle a processing instruction as a ProcessingInstruction object, possibly one with a %SOUP-ENCODING% slot into which an encoding will be plugged later.
def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.isSelfClosingTag | ( | self, | |
name | |||
) |
Returns true iff the given string is the name of a self-closing tag according to this parser.
def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.parse_declaration | ( | self, | |
i | |||
) |
Treat a bogus SGML declaration as raw data. Treat a CDATA declaration as a CData object.
def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.popTag | ( | self | ) |
def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.pushTag | ( | self, | |
tag | |||
) |
def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.reset | ( | self | ) |
def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.unknown_endtag | ( | self, | |
name | |||
) |
def webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.unknown_starttag | ( | self, | |
name, | |||
attrs, | |||
selfClosing = 0 |
|||
) |
|
static |
webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.convertEntities |
webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.convertHTMLEntities |
webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.convertXMLEntities |
webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.currentData |
webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.currentTag |
webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.declaredHTMLEncoding |
webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.escapeUnrecognizedEntities |
webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.fromEncoding |
webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.hidden |
|
static |
webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.instanceSelfClosingTags |
webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.literal |
webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.markup |
|
static |
webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.markupMassage |
|
static |
webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.originalEncoding |
webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.parseOnlyThese |
|
static |
webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.previous |
|
static |
webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.quoteStack |
|
static |
|
static |
webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.smartQuotesTo |
|
static |
webkitpy.thirdparty.BeautifulSoup.BeautifulStoneSoup.tagStack |
|
static |
|
static |