Skip to content Skip to sidebar Skip to footer

Lxml - Is There Any Hacky Way To Keep "?

I noticed the xml entities " will automatically force to convert to their real original characters: >>> from lxml import etree as et >>> parser = et.XMLPa

Solution 1:

from xml.sax.saxutils import escape
from lxml import etree

def to_string(xdoc):
    r = ""
    for action, elem in etree.iterwalk(xdoc, events=("start", "end")):
        if action == 'start':
            text = escape(elem.text, {"'": "'", "\"": """}) if elem.text is not None else ""
            attrs = "".join([' %s="%s"' % (k, v) for k, v in elem.attrib.items()])
            r += "<%s%s>%s" % (elem.tag, attrs, text)
        elif action == 'end':
            r += "</%s>%s" % (elem.tag, elem.tail if elem.tail else "\n")
    return r
xdoc = etree.fromstring(xml_text)
s = to_string(xdoc)

Post a Comment for "Lxml - Is There Any Hacky Way To Keep "?"