Index: python/src/html5lib/liberalxmlparser.py =================================================================== --- python/src/html5lib/liberalxmlparser.py (revision 1058) +++ python/src/html5lib/liberalxmlparser.py (working copy) @@ -20,6 +20,9 @@ from xml.dom import XHTML_NAMESPACE from xml.sax.saxutils import unescape +import odict # From http://pypi.python.org/pypi/Ordered%20Dictionary/ + + class XMLParser(html5parser.HTMLParser): """ liberal XML parser """ @@ -31,7 +34,7 @@ def normalizeToken(self, token): if token["type"] in ("StartTag", "EmptyTag"): - token["data"] = dict(token["data"][::-1]) + token["data"] = odict.OrderedDict(token["data"]) # For EmptyTags, process both a Start and an End tag if token["type"] == "EmptyTag": Index: python/src/html5lib/serializer/htmlserializer.py =================================================================== --- python/src/html5lib/serializer/htmlserializer.py (revision 1058) +++ python/src/html5lib/serializer/htmlserializer.py (working copy) @@ -58,6 +58,7 @@ quote_char = '"' use_best_quote_char = True minimize_boolean_attributes = True + preserve_attr_order = False use_trailing_solidus = False space_before_trailing_solidus = True @@ -70,7 +71,8 @@ omit_optional_tags = True options = ("quote_attr_values", "quote_char", "use_best_quote_char", - "minimize_boolean_attributes", "use_trailing_solidus", + "minimize_boolean_attributes", "preserve_attr_order", + "use_trailing_solidus", "space_before_trailing_solidus", "omit_optional_tags", "strip_whitespace", "inject_meta_charset", "escape_lt_in_attrs", "escape_rcdata", 'use_trailing_solidus', "sanitize") @@ -128,10 +130,17 @@ in_cdata = True elif in_cdata: self.serializeError(_("Unexpected child element of a CDATA element")) + # Preserving attribute order obviously only works here + # if 'attrs' is not already a dict. Is the: + # hasattr(attrs, "items") + # check here cruft? or are there still cases where this + # can be true? attrs = token["data"] if hasattr(attrs, "items"): attrs = attrs.items() - attrs.sort() + attrs.sort() + elif not self.preserve_attr_order: + attrs.sort() attributes = [] for k,v in attrs: if encoding: Index: python/src/html5lib/html5parser.py =================================================================== --- python/src/html5lib/html5parser.py (revision 1058) +++ python/src/html5lib/html5parser.py (working copy) @@ -28,6 +28,9 @@ from constants import headingElements, tableInsertModeElements from constants import cdataElements, rcdataElements, voidElements +import odict # From http://pypi.python.org/pypi/Ordered%20Dictionary/ + + class HTMLParser(object): """HTML parser. Generates a tree structure from a stream of (possibly malformed) HTML""" @@ -183,7 +186,7 @@ token["type"] = "StartTag" if token["type"] == "StartTag": - token["data"] = dict(token["data"][::-1]) + token["data"] = odict.OrderedDict(token["data"]) return token