From 65347b5993fd3ba35c5c2078f961ccb88e9c04cb Mon Sep 17 00:00:00 2001
From: Bob Lannon <blannon@gmail.com>
Date: Mon, 15 Apr 2013 17:16:59 -0400
Subject: [PATCH] added XMLSource, which depends on xmltodict

---
 saucebrush/sources.py | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/saucebrush/sources.py b/saucebrush/sources.py
index 92844a7..04e346e 100644
--- a/saucebrush/sources.py
+++ b/saucebrush/sources.py
@@ -270,3 +270,38 @@ class JSONSource(FileSource):
                 yield record
         else:
             yield obj
+
+class XMLSource(FileSource):
+    """ Source for reading from XML files. Use with the same kind of caution
+        that you use to approach anything written in XML.
+
+        When processing XML files, if the top-level object is a list, will
+        yield each member separately, unless the dotted path to a list is
+        included. you can also do this with a SubrecordFilter, but XML is 
+        almost never going to be useful at the top level.
+    """
+
+    def __init__(self,input,node_list=None):
+        super(XMLSource, self).__init__(input)
+        self.node_list = node_list.split('.')
+
+    def _process_file(self, f):
+
+        import xmltodict
+
+        obj = xmltodict.parse(f)
+
+        # If node list was given, walk down the tree
+
+        if self.node_list:
+            for node in self.node_list:
+                obj = obj[node] 
+
+        # If the top-level XML object in the file is a list
+        # then yield each element separately; otherwise, yield
+        # the top-level object.
+        if isinstance(obj, list):
+            for record in obj:
+                yield record
+        else:
+            yield obj