added XMLSource, which depends on xmltodict
This commit is contained in:
parent
fe56a78963
commit
65347b5993
@ -270,3 +270,38 @@ class JSONSource(FileSource):
|
|||||||
yield record
|
yield record
|
||||||
else:
|
else:
|
||||||
yield obj
|
yield obj
|
||||||
|
|
||||||
|
class XMLSource(FileSource):
|
||||||
|
""" Source for reading from XML files. Use with the same kind of caution
|
||||||
|
that you use to approach anything written in XML.
|
||||||
|
|
||||||
|
When processing XML files, if the top-level object is a list, will
|
||||||
|
yield each member separately, unless the dotted path to a list is
|
||||||
|
included. you can also do this with a SubrecordFilter, but XML is
|
||||||
|
almost never going to be useful at the top level.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,input,node_list=None):
|
||||||
|
super(XMLSource, self).__init__(input)
|
||||||
|
self.node_list = node_list.split('.')
|
||||||
|
|
||||||
|
def _process_file(self, f):
|
||||||
|
|
||||||
|
import xmltodict
|
||||||
|
|
||||||
|
obj = xmltodict.parse(f)
|
||||||
|
|
||||||
|
# If node list was given, walk down the tree
|
||||||
|
|
||||||
|
if self.node_list:
|
||||||
|
for node in self.node_list:
|
||||||
|
obj = obj[node]
|
||||||
|
|
||||||
|
# If the top-level XML object in the file is a list
|
||||||
|
# then yield each element separately; otherwise, yield
|
||||||
|
# the top-level object.
|
||||||
|
if isinstance(obj, list):
|
||||||
|
for record in obj:
|
||||||
|
yield record
|
||||||
|
else:
|
||||||
|
yield obj
|
||||||
|
Loading…
Reference in New Issue
Block a user