Index: python/src/html5lib/liberalxmlparser.py
===================================================================
--- python/src/html5lib/liberalxmlparser.py (revision 1058)
+++ python/src/html5lib/liberalxmlparser.py (working copy)
@@ -20,6 +20,9 @@
from xml.dom import XHTML_NAMESPACE
from xml.sax.saxutils import unescape
+import odict # From http://pypi.python.org/pypi/Ordered%20Dictionary/
+
+
class XMLParser(html5parser.HTMLParser):
""" liberal XML parser """
@@ -31,7 +34,7 @@
def normalizeToken(self, token):
if token["type"] in ("StartTag", "EmptyTag"):
- token["data"] = dict(token["data"][::-1])
+ token["data"] = odict.OrderedDict(token["data"])
# For EmptyTags, process both a Start and an End tag
if token["type"] == "EmptyTag":
Index: python/src/html5lib/serializer/htmlserializer.py
===================================================================
--- python/src/html5lib/serializer/htmlserializer.py (revision 1058)
+++ python/src/html5lib/serializer/htmlserializer.py (working copy)
@@ -58,6 +58,7 @@
quote_char = '"'
use_best_quote_char = True
minimize_boolean_attributes = True
+ preserve_attr_order = False
use_trailing_solidus = False
space_before_trailing_solidus = True
@@ -70,7 +71,8 @@
omit_optional_tags = True
options = ("quote_attr_values", "quote_char", "use_best_quote_char",
- "minimize_boolean_attributes", "use_trailing_solidus",
+ "minimize_boolean_attributes", "preserve_attr_order",
+ "use_trailing_solidus",
"space_before_trailing_solidus", "omit_optional_tags",
"strip_whitespace", "inject_meta_charset", "escape_lt_in_attrs",
"escape_rcdata", 'use_trailing_solidus', "sanitize")
@@ -128,10 +130,17 @@
in_cdata = True
elif in_cdata:
self.serializeError(_("Unexpected child element of a CDATA element"))
+ # Preserving attribute order obviously only works here
+ # if 'attrs' is not already a dict. Is the:
+ # hasattr(attrs, "items")
+ # check here cruft? or are there still cases where this
+ # can be true?
attrs = token["data"]
if hasattr(attrs, "items"):
attrs = attrs.items()
- attrs.sort()
+ attrs.sort()
+ elif not self.preserve_attr_order:
+ attrs.sort()
attributes = []
for k,v in attrs:
if encoding:
Index: python/src/html5lib/html5parser.py
===================================================================
--- python/src/html5lib/html5parser.py (revision 1058)
+++ python/src/html5lib/html5parser.py (working copy)
@@ -28,6 +28,9 @@
from constants import headingElements, tableInsertModeElements
from constants import cdataElements, rcdataElements, voidElements
+import odict # From http://pypi.python.org/pypi/Ordered%20Dictionary/
+
+
class HTMLParser(object):
"""HTML parser. Generates a tree structure from a stream of (possibly
malformed) HTML"""
@@ -183,7 +186,7 @@
token["type"] = "StartTag"
if token["type"] == "StartTag":
- token["data"] = dict(token["data"][::-1])
+ token["data"] = odict.OrderedDict(token["data"])
return token