|
| def | __init__ (self, args, kwargs) |
| |
| def | start_meta (self, attrs) |
| |
| def | __init__ (self, markup="", parseOnlyThese=None, fromEncoding=None, markupMassage=True, smartQuotesTo=XML_ENTITIES, convertEntities=None, selfClosingTags=None, isHTML=False) |
| |
| def | convert_charref (self, name) |
| |
| def | __getattr__ (self, methodName) |
| |
| def | isSelfClosingTag (self, name) |
| |
| def | reset (self) |
| |
| def | popTag (self) |
| |
| def | pushTag (self, tag) |
| |
| def | endData (self, containerClass=NavigableString) |
| |
| def | unknown_starttag (self, name, attrs, selfClosing=0) |
| |
| def | unknown_endtag (self, name) |
| |
| def | handle_data (self, data) |
| |
| def | handle_pi (self, text) |
| |
| def | handle_comment (self, text) |
| |
| def | handle_charref (self, ref) |
| |
| def | handle_entityref (self, ref) |
| |
| def | handle_decl (self, data) |
| |
| def | parse_declaration (self, i) |
| |
| def | __init__ (self, parser, name, attrs=None, parent=None, previous=None) |
| |
| def | getString (self) |
| |
| def | setString (self, string) |
| |
| def | getText (self, separator=u"") |
| |
| def | get (self, key, default=None) |
| |
| def | clear (self) |
| |
| def | index (self, element) |
| |
| def | has_key (self, key) |
| |
| def | __getitem__ (self, key) |
| |
| def | __iter__ (self) |
| |
| def | __len__ (self) |
| |
| def | __contains__ (self, x) |
| |
| def | __nonzero__ (self) |
| |
| def | __setitem__ (self, key, value) |
| |
| def | __delitem__ (self, key) |
| |
| def | __call__ (self, args, kwargs) |
| |
| def | __getattr__ (self, tag) |
| |
| def | __eq__ (self, other) |
| |
| def | __ne__ (self, other) |
| |
| def | __repr__ (self, encoding=DEFAULT_OUTPUT_ENCODING) |
| |
| def | __unicode__ (self) |
| |
| def | __str__ (self, encoding=DEFAULT_OUTPUT_ENCODING, prettyPrint=False, indentLevel=0) |
| |
| def | decompose (self) |
| |
| def | prettify (self, encoding=DEFAULT_OUTPUT_ENCODING) |
| |
| def | renderContents (self, encoding=DEFAULT_OUTPUT_ENCODING, prettyPrint=False, indentLevel=0) |
| |
| def | find (self, name=None, attrs={}, recursive=True, text=None, kwargs) |
| |
| def | findAll (self, name=None, attrs={}, recursive=True, text=None, limit=None, kwargs) |
| |
| def | fetchText (self, text=None, recursive=True, limit=None) |
| |
| def | firstText (self, text=None, recursive=True) |
| |
| def | childGenerator (self) |
| |
| def | recursiveChildGenerator (self) |
| |
| def | setup (self, parent=None, previous=None) |
| |
| def | replaceWith (self, replaceWith) |
| |
| def | replaceWithChildren (self) |
| |
| def | extract (self) |
| |
| def | insert (self, position, newChild) |
| |
| def | append (self, tag) |
| |
| def | findNext (self, name=None, attrs={}, text=None, kwargs) |
| |
| def | findAllNext (self, name=None, attrs={}, text=None, limit=None, kwargs) |
| |
| def | findNextSibling (self, name=None, attrs={}, text=None, kwargs) |
| |
| def | findNextSiblings (self, name=None, attrs={}, text=None, limit=None, kwargs) |
| |
| def | findPrevious (self, name=None, attrs={}, text=None, kwargs) |
| |
| def | findAllPrevious (self, name=None, attrs={}, text=None, limit=None, kwargs) |
| |
| def | findPreviousSibling (self, name=None, attrs={}, text=None, kwargs) |
| |
| def | findPreviousSiblings (self, name=None, attrs={}, text=None, limit=None, kwargs) |
| |
| def | findParent (self, name=None, attrs={}, kwargs) |
| |
| def | findParents (self, name=None, attrs={}, limit=None, kwargs) |
| |
| def | nextGenerator (self) |
| |
| def | nextSiblingGenerator (self) |
| |
| def | previousGenerator (self) |
| |
| def | previousSiblingGenerator (self) |
| |
| def | parentGenerator (self) |
| |
| def | substituteEncoding (self, str, encoding=None) |
| |
| def | toEncoding (self, s, encoding=None) |
| |
| | originalEncoding |
| |
| | declaredHTMLEncoding |
| |
| | parseOnlyThese |
| |
| | fromEncoding |
| |
| | smartQuotesTo |
| |
| | convertEntities |
| |
| | convertXMLEntities |
| |
| | convertHTMLEntities |
| |
| | escapeUnrecognizedEntities |
| |
| | instanceSelfClosingTags |
| |
| | markup |
| |
| | markupMassage |
| |
| | originalEncoding |
| |
| | declaredHTMLEncoding |
| |
| | hidden |
| |
| | currentData |
| |
| | currentTag |
| |
| | tagStack |
| |
| | quoteStack |
| |
| | previous |
| |
| | literal |
| |
| | parserClass |
| |
| | isSelfClosing |
| |
| | name |
| |
| | attrs |
| |
| | contents |
| |
| | hidden |
| |
| | containsSubstitutions |
| |
| | convertHTMLEntities |
| |
| | convertXMLEntities |
| |
| | escapeUnrecognizedEntities |
| |
| | attrMap |
| |
| | parent |
| |
| | previous |
| |
| | next |
| |
| | previousSibling |
| |
| | nextSibling |
| |
| | string = property(getString, setString) |
| |
| | text = property(getText) |
| |
The MinimalSoup class is for parsing HTML that contains
pathologically bad markup. It makes no assumptions about tag
nesting, but it does know which tags are self-closing, that
<script> tags contain Javascript and should not be parsed, that
META tags may contain encoding information, and so on.
This also makes it better for subclassing than BeautifulStoneSoup
or BeautifulSoup.