#!/usr/bin/env python """ Simple parser for Minimal XML[1] (aka Simple Markup Language). Author: Magnus Lie Hetland History: 1.1 (2001-09-28) Minor bugfix 1.0 (2001-05-20) Initial version [1] http://www.docuverse.com/smldev/minxml.html """ import re, sys def parse(text): text = re.sub('&#(\d+);', lambda m: chr(int(m.group(1))), text) text = re.sub('\s+', ' ', text) tokens = re.split('(<[^>]+>)', text) tokens = [t.strip() for t in tokens if t.strip()] tokens.reverse() return tree(tokens) def tree(tokens,tag=None): if not tag: tag = tokens.pop()[1:-1] kids = [] while 1: tok = tokens.pop() if len(tok) > 1 and tok[1] == '/': break if tok[0] == '<': kids += [tree(tokens,tok[1:-1])] else: kids += [(None, tok)] return (tag, kids) if __name__=="__main__": print parse(sys.argv[1])