"""sax_helper.py

Provides functions that help deal with SAX events.

Classes:
EventSaver       A ContentHandler that saves events into a list.

Functions:
make_generator    Make an XMLGenerator object from a writeable filehandle.
apply_events      Apply a list of events to a content handler.

munge_characters  Concatenate consecutive 'characters' in a list of events.

xml2sax           Convert an XML string into sax events.
sax2xml           Convert a list of sax events into an XML string.

"""
import sys
from xml import sax
from xml.sax import handler
from xml.sax import saxutils
try:
    import cStringIO as StringIO
except ImportError:
    import StringIO

class EventSaver:
    """This is a ContentHandler that saves all the method calls into a
    list of events.

    Members:
    events   List of tuples (name of event, args passed to function).

    """
    class _Saver:
        # This is a helper class that saves whatever it receives to a list.
        def __init__(self, name, events):
            self.name = name
            self.events = events
        def __call__(self, *args):
            self.events.append((self.name, args))
    def __init__(self):
        """EventSaver() -> instance"""
        self.events = []
    def __getattr__(self, attr):
        return self._Saver(attr, self.events)

def make_generator(handle=None):
    """make_generator([handle]) -> XMLGenerator

    Create an XMLGenerator from a filehandle.  handle should be open
    for writing.  If not provided, will default to sys.stdout.

    """
    if handle is None:
        handle = sys.stdout
    return saxutils.XMLGenerator(handle)

def write_simple_element(content_handler, tag, events):
    """write_simple_element(content_handler, tag, events)

    Write a simple element to a content_handler.  tag is the tag given
    the element.  events is a list of SAX events that go in between
    the tags.  This is provided as a convenience function.

    """
    content_handler.startElement(tag, {})
    apply_events(events, content_handler)
    content_handler.endElement(tag)
    content_handler.characters('\n')

def apply_events(events, content_handler):
    """apply_events(events, content_handler)

    Apply the list of events to a content_handler.

    """
    for fn_name, args in events:
        fn = getattr(content_handler, fn_name)
        apply(fn, args)

_XML2SAX_MAGIC_ENTITY = "XML2SAX_MAGIC_ENTITY"
def xml2sax(xmlstr):
    """xml2sax(xmlstr) -> SAX events"""
    parser = sax.make_parser()
    saver = EventSaver()
    parser.setContentHandler(saver)
    parser.setErrorHandler(handler.ErrorHandler())
    # Supply a dummy document name, to make sure the XML is valid.
    magic = _XML2SAX_MAGIC_ENTITY    # for convenience
    parser.feed("<%s>%s</%s>" % (magic, xmlstr, magic))
    parser.close()

    # Now get rid of the magic entity.
    # Look for the start one from the beginning, the end one at the end.
    for i in range(len(saver.events)):
        if saver.events[i][0] == "startElement" and \
           saver.events[i][1][0] == magic:
            del saver.events[i]
            break
    else:
        raise ValueError, "I could not find magic start entity '%s'" % magic
    for i in range(len(saver.events)-1, -1, -1):
        if saver.events[i][0] == "endElement" and \
           saver.events[i][1][0] == magic:
            del saver.events[i]
            break
    else:
        raise ValueError, "I could not find magic end entity '%s'" % magic
    return saver.events

def sax2xml(events):
    """sax2xml(events) -> str"""
    handle = StringIO.StringIO()
    gen = make_generator(handle)
    for event, args in events:
        getattr(gen, event)(*args)
    handle.seek(0)
    return handle.read()
    

def munge_characters(events):
    """munge_characters(events) -> events

    Concatenate the strings in consecutive 'characters' events.  SAX
    parsers in some conditions will split text across multiple
    characters events.  This will munge them back together.

    """
    # Because the parser is read incrementally, it may have split text
    # between tokens.  Thus, I want to concatenate contiguous pieces
    # of text.
    clean_events = []
    for event, args in events:
        # If the previous event was also a string, then concatenate them.
        if clean_events and \
           (event == 'characters' and clean_events[-1][0] == 'characters'):
            # This only handles the first argument of 'contents'.
            # However, it's only supposed to have one, so it won't be
            # a problem unless the specs are changed.
            e, a = clean_events.pop()
            new_args = (a[0] + args[0],) + args[1:]
            clean_events.append((e, new_args))
        # Otherwise, add it as a new event.
        else:
            clean_events.append((event, args))
    return clean_events

